sqlite-vec/tests/fuzz/rescore-quantize-edge.c

#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>

/* Test wrappers from sqlite-vec-rescore.c (SQLITE_VEC_TEST build) */
extern void _test_rescore_quantize_float_to_bit(const float *src, uint8_t *dst, size_t dim);
extern void _test_rescore_quantize_float_to_int8(const float *src, int8_t *dst, size_t dim);
extern size_t _test_rescore_quantized_byte_size_bit(size_t dimensions);
extern size_t _test_rescore_quantized_byte_size_int8(size_t dimensions);

/**
 * Fuzz target: edge cases in rescore quantization functions.
 *
 * The existing rescore-quantize.c only tests dimensions that are multiples of 8
 * and never passes special float values. This target:
 *
 * - Tests rescore_quantized_byte_size with arbitrary dimension values
 *   (including 0, 1, 7, MAX values -- looking for integer division issues)
 * - Passes raw float reinterpretation of fuzz bytes (NaN, Inf, denormals,
 *   negative zero -- these are the values that break min/max/range logic)
 * - Tests the int8 quantizer with all-identical values (range=0 branch)
 * - Tests the int8 quantizer with extreme ranges (overflow in scale calc)
 * - Tests bit quantizer with exact float threshold (0.0f boundary)
 */
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
  if (size < 8) return 0;

  uint8_t mode = data[0] % 5;
  data++; size--;

  switch (mode) {
    case 0: {
      /* Test rescore_quantized_byte_size with fuzz-controlled dimensions.
         This function does dimensions / CHAR_BIT for bit, dimensions for int8.
         We're checking it doesn't do anything weird with edge values. */
      if (size < sizeof(size_t)) return 0;
      size_t dim;
      memcpy(&dim, data, sizeof(dim));

      /* These should never crash, just return values */
      size_t bit_size = _test_rescore_quantized_byte_size_bit(dim);
      size_t int8_size = _test_rescore_quantized_byte_size_int8(dim);

      /* Verify basic invariants */
      (void)bit_size;
      (void)int8_size;
      break;
    }

    case 1: {
      /* Bit quantize with raw reinterpreted floats (NaN, Inf, denormal).
         The key check: src[i] >= 0.0f -- NaN comparison is always false,
         so NaN should produce 0-bits. But denormals and -0.0f are tricky. */
      size_t num_floats = size / sizeof(float);
      if (num_floats == 0) return 0;
      /* Round to multiple of 8 for bit quantizer */
      size_t dim = (num_floats / 8) * 8;
      if (dim == 0) return 0;

      const float *src = (const float *)data;
      size_t bit_bytes = dim / 8;
      uint8_t *dst = (uint8_t *)malloc(bit_bytes);
      if (!dst) return 0;

      _test_rescore_quantize_float_to_bit(src, dst, dim);

      /* Verify: for each bit, if src >= 0 then bit should be set */
      for (size_t i = 0; i < dim; i++) {
        int bit_set = (dst[i / 8] >> (i % 8)) & 1;
        if (src[i] >= 0.0f) {
          assert(bit_set == 1);
        } else if (src[i] < 0.0f) {
          /* Definitely negative -- bit must be 0 */
          assert(bit_set == 0);
        }
        /* NaN: comparison is false, so bit_set should be 0 */
      }

      free(dst);
      break;
    }

    case 2: {
      /* Int8 quantize with raw reinterpreted floats.
         The dangerous paths:
         - All values identical (range == 0) -> memset path
         - vmin/vmax with NaN (NaN < anything is false, NaN > anything is false)
         - Extreme range causing scale = 255/range to be Inf or 0
         - denormals near the clamping boundaries */
      size_t num_floats = size / sizeof(float);
      if (num_floats == 0) return 0;

      const float *src = (const float *)data;
      int8_t *dst = (int8_t *)malloc(num_floats);
      if (!dst) return 0;

      _test_rescore_quantize_float_to_int8(src, dst, num_floats);

      /* Output must always be in [-128, 127] (trivially true for int8_t,
         but check the actual clamping logic worked) */
      for (size_t i = 0; i < num_floats; i++) {
        assert(dst[i] >= -128 && dst[i] <= 127);
      }

      free(dst);
      break;
    }

    case 3: {
      /* Int8 quantize stress: all-same values (range=0 branch) */
      size_t dim = (size < 64) ? size : 64;
      if (dim == 0) return 0;

      float *src = (float *)malloc(dim * sizeof(float));
      int8_t *dst = (int8_t *)malloc(dim);
      if (!src || !dst) { free(src); free(dst); return 0; }

      /* Fill with a single value derived from fuzz data */
      float val;
      memcpy(&val, data, sizeof(float) < size ? sizeof(float) : size);
      for (size_t i = 0; i < dim; i++) src[i] = val;

      _test_rescore_quantize_float_to_int8(src, dst, dim);

      /* All outputs should be 0 when range == 0 */
      for (size_t i = 0; i < dim; i++) {
        assert(dst[i] == 0);
      }

      free(src);
      free(dst);
      break;
    }

    case 4: {
      /* Int8 quantize with extreme range: one huge positive, one huge negative.
         Tests scale = 255.0f / range overflow to Inf, then v * Inf = Inf,
         then clamping to [-128, 127]. */
      if (size < 2 * sizeof(float)) return 0;

      float extreme[2];
      memcpy(extreme, data, 2 * sizeof(float));

      /* Only test if both are finite (NaN/Inf tested in case 2) */
      if (!isfinite(extreme[0]) || !isfinite(extreme[1])) return 0;

      /* Build a vector with these two extreme values plus some fuzz */
      size_t dim = 16;
      float src[16];
      src[0] = extreme[0];
      src[1] = extreme[1];
      for (size_t i = 2; i < dim; i++) {
        if (2 * sizeof(float) + (i - 2) < size) {
          src[i] = (float)((int8_t)data[2 * sizeof(float) + (i - 2)]) * 1000.0f;
        } else {
          src[i] = 0.0f;
        }
      }

      int8_t dst[16];
      _test_rescore_quantize_float_to_int8(src, dst, dim);

      for (size_t i = 0; i < dim; i++) {
        assert(dst[i] >= -128 && dst[i] <= 127);
      }
      break;
    }
  }

  return 0;
}
Add rescore index for ANN queries Add rescore index type: stores full-precision float vectors in a rowid-keyed shadow table, quantizes to int8 for fast initial scan, then rescores top candidates with original vectors. Includes config parser, shadow table management, insert/delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_RESCORE), fuzz targets, and tests. 2026-03-29 19:45:54 -07:00			`#include <stdint.h>`
			`#include <stddef.h>`
			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <math.h>`
			`#include "sqlite-vec.h"`
			`#include "sqlite3.h"`
			`#include <assert.h>`

			`/* Test wrappers from sqlite-vec-rescore.c (SQLITE_VEC_TEST build) */`
			`extern void _test_rescore_quantize_float_to_bit(const float src, uint8_t dst, size_t dim);`
			`extern void _test_rescore_quantize_float_to_int8(const float src, int8_t dst, size_t dim);`
			`extern size_t _test_rescore_quantized_byte_size_bit(size_t dimensions);`
			`extern size_t _test_rescore_quantized_byte_size_int8(size_t dimensions);`

			`/**`
			`* Fuzz target: edge cases in rescore quantization functions.`
			`*`
			`* The existing rescore-quantize.c only tests dimensions that are multiples of 8`
			`* and never passes special float values. This target:`
			`*`
			`* - Tests rescore_quantized_byte_size with arbitrary dimension values`
			`* (including 0, 1, 7, MAX values -- looking for integer division issues)`
			`* - Passes raw float reinterpretation of fuzz bytes (NaN, Inf, denormals,`
			`* negative zero -- these are the values that break min/max/range logic)`
			`* - Tests the int8 quantizer with all-identical values (range=0 branch)`
			`* - Tests the int8 quantizer with extreme ranges (overflow in scale calc)`
			`* - Tests bit quantizer with exact float threshold (0.0f boundary)`
			`*/`
			`int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {`
			`if (size < 8) return 0;`

			`uint8_t mode = data[0] % 5;`
			`data++; size--;`

			`switch (mode) {`
			`case 0: {`
			`/* Test rescore_quantized_byte_size with fuzz-controlled dimensions.`
			`This function does dimensions / CHAR_BIT for bit, dimensions for int8.`
			`We're checking it doesn't do anything weird with edge values. */`
			`if (size < sizeof(size_t)) return 0;`
			`size_t dim;`
			`memcpy(&dim, data, sizeof(dim));`

			`/* These should never crash, just return values */`
			`size_t bit_size = _test_rescore_quantized_byte_size_bit(dim);`
			`size_t int8_size = _test_rescore_quantized_byte_size_int8(dim);`

			`/* Verify basic invariants */`
			`(void)bit_size;`
			`(void)int8_size;`
			`break;`
			`}`

			`case 1: {`
			`/* Bit quantize with raw reinterpreted floats (NaN, Inf, denormal).`
			`The key check: src[i] >= 0.0f -- NaN comparison is always false,`
			`so NaN should produce 0-bits. But denormals and -0.0f are tricky. */`
			`size_t num_floats = size / sizeof(float);`
			`if (num_floats == 0) return 0;`
			`/* Round to multiple of 8 for bit quantizer */`
			`size_t dim = (num_floats / 8) * 8;`
			`if (dim == 0) return 0;`

			`const float src = (const float )data;`
			`size_t bit_bytes = dim / 8;`
			`uint8_t dst = (uint8_t )malloc(bit_bytes);`
			`if (!dst) return 0;`

			`_test_rescore_quantize_float_to_bit(src, dst, dim);`

			`/* Verify: for each bit, if src >= 0 then bit should be set */`
			`for (size_t i = 0; i < dim; i++) {`
			`int bit_set = (dst[i / 8] >> (i % 8)) & 1;`
			`if (src[i] >= 0.0f) {`
			`assert(bit_set == 1);`
			`} else if (src[i] < 0.0f) {`
			`/* Definitely negative -- bit must be 0 */`
			`assert(bit_set == 0);`
			`}`
			`/* NaN: comparison is false, so bit_set should be 0 */`
			`}`

			`free(dst);`
			`break;`
			`}`

			`case 2: {`
			`/* Int8 quantize with raw reinterpreted floats.`
			`The dangerous paths:`
			`- All values identical (range == 0) -> memset path`
			`- vmin/vmax with NaN (NaN < anything is false, NaN > anything is false)`
			`- Extreme range causing scale = 255/range to be Inf or 0`
			`- denormals near the clamping boundaries */`
			`size_t num_floats = size / sizeof(float);`
			`if (num_floats == 0) return 0;`

			`const float src = (const float )data;`
			`int8_t dst = (int8_t )malloc(num_floats);`
			`if (!dst) return 0;`

			`_test_rescore_quantize_float_to_int8(src, dst, num_floats);`

			`/* Output must always be in [-128, 127] (trivially true for int8_t,`
			`but check the actual clamping logic worked) */`
			`for (size_t i = 0; i < num_floats; i++) {`
			`assert(dst[i] >= -128 && dst[i] <= 127);`
			`}`

			`free(dst);`
			`break;`
			`}`

			`case 3: {`
			`/* Int8 quantize stress: all-same values (range=0 branch) */`
			`size_t dim = (size < 64) ? size : 64;`
			`if (dim == 0) return 0;`

			`float src = (float )malloc(dim * sizeof(float));`
			`int8_t dst = (int8_t )malloc(dim);`
			`if (!src \|\| !dst) { free(src); free(dst); return 0; }`

			`/* Fill with a single value derived from fuzz data */`
			`float val;`
			`memcpy(&val, data, sizeof(float) < size ? sizeof(float) : size);`
			`for (size_t i = 0; i < dim; i++) src[i] = val;`

			`_test_rescore_quantize_float_to_int8(src, dst, dim);`

			`/* All outputs should be 0 when range == 0 */`
			`for (size_t i = 0; i < dim; i++) {`
			`assert(dst[i] == 0);`
			`}`

			`free(src);`
			`free(dst);`
			`break;`
			`}`

			`case 4: {`
			`/* Int8 quantize with extreme range: one huge positive, one huge negative.`
			`Tests scale = 255.0f / range overflow to Inf, then v * Inf = Inf,`
			`then clamping to [-128, 127]. */`
			`if (size < 2 * sizeof(float)) return 0;`

			`float extreme[2];`
			`memcpy(extreme, data, 2 * sizeof(float));`

			`/* Only test if both are finite (NaN/Inf tested in case 2) */`
			`if (!isfinite(extreme[0]) \|\| !isfinite(extreme[1])) return 0;`

			`/* Build a vector with these two extreme values plus some fuzz */`
			`size_t dim = 16;`
			`float src[16];`
			`src[0] = extreme[0];`
			`src[1] = extreme[1];`
			`for (size_t i = 2; i < dim; i++) {`
			`if (2 * sizeof(float) + (i - 2) < size) {`
			`src[i] = (float)((int8_t)data[2 * sizeof(float) + (i - 2)]) * 1000.0f;`
			`} else {`
			`src[i] = 0.0f;`
			`}`
			`}`

			`int8_t dst[16];`
			`_test_rescore_quantize_float_to_int8(src, dst, dim);`

			`for (size_t i = 0; i < dim; i++) {`
			`assert(dst[i] >= -128 && dst[i] <= 127);`
			`}`
			`break;`
			`}`
			`}`

			`return 0;`
			`}`