Add rescore index for ANN queries

Add rescore index type: stores full-precision float vectors in a rowid-keyed
shadow table, quantizes to int8 for fast initial scan, then rescores top
candidates with original vectors. Includes config parser, shadow table
management, insert/delete support, KNN integration, compile flag
(SQLITE_VEC_ENABLE_RESCORE), fuzz targets, and tests.
This commit is contained in:
Alex Garcia 2026-03-29 19:45:54 -07:00
parent bf2455f2ba
commit ba0db0b6d6
19 changed files with 3378 additions and 8 deletions

View file

@ -1,2 +1,7 @@
*.dSYM
targets/
corpus/
crash-*
leak-*
timeout-*
*.log

View file

@ -72,10 +72,34 @@ $(TARGET_DIR)/vec_mismatch: vec-mismatch.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(TARGET_DIR)/vec0_delete_completeness: vec0-delete-completeness.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
$(TARGET_DIR)/rescore_operations: rescore-operations.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(FUZZ_CC) $(FUZZ_CFLAGS) -DSQLITE_VEC_ENABLE_RESCORE $(FUZZ_SRCS) $< -o $@
$(TARGET_DIR)/rescore_create: rescore-create.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(FUZZ_CC) $(FUZZ_CFLAGS) -DSQLITE_VEC_ENABLE_RESCORE $(FUZZ_SRCS) $< -o $@
$(TARGET_DIR)/rescore_quantize: rescore-quantize.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(FUZZ_CC) $(FUZZ_CFLAGS) -DSQLITE_VEC_ENABLE_RESCORE -DSQLITE_VEC_TEST $(FUZZ_SRCS) $< -o $@
$(TARGET_DIR)/rescore_shadow_corrupt: rescore-shadow-corrupt.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(FUZZ_CC) $(FUZZ_CFLAGS) -DSQLITE_VEC_ENABLE_RESCORE $(FUZZ_SRCS) $< -o $@
$(TARGET_DIR)/rescore_knn_deep: rescore-knn-deep.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(FUZZ_CC) $(FUZZ_CFLAGS) -DSQLITE_VEC_ENABLE_RESCORE $(FUZZ_SRCS) $< -o $@
$(TARGET_DIR)/rescore_quantize_edge: rescore-quantize-edge.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(FUZZ_CC) $(FUZZ_CFLAGS) -DSQLITE_VEC_ENABLE_RESCORE -DSQLITE_VEC_TEST $(FUZZ_SRCS) $< -o $@
$(TARGET_DIR)/rescore_interleave: rescore-interleave.c $(FUZZ_SRCS) | $(TARGET_DIR)
$(FUZZ_CC) $(FUZZ_CFLAGS) -DSQLITE_VEC_ENABLE_RESCORE $(FUZZ_SRCS) $< -o $@
FUZZ_TARGETS = vec0_create exec json numpy \
shadow_corrupt vec0_operations scalar_functions \
vec0_create_full metadata_columns vec_each vec_mismatch \
vec0_delete_completeness
vec0_delete_completeness \
rescore_operations rescore_create rescore_quantize \
rescore_shadow_corrupt rescore_knn_deep \
rescore_quantize_edge rescore_interleave
all: $(addprefix $(TARGET_DIR)/,$(FUZZ_TARGETS))

View file

@ -0,0 +1,36 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
int rc = SQLITE_OK;
sqlite3 *db;
sqlite3_stmt *stmt;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
sqlite3_str *s = sqlite3_str_new(NULL);
assert(s);
sqlite3_str_appendall(s, "CREATE VIRTUAL TABLE v USING vec0(emb float[128] indexed by rescore(");
sqlite3_str_appendf(s, "%.*s", (int)size, data);
sqlite3_str_appendall(s, "))");
const char *zSql = sqlite3_str_finish(s);
assert(zSql);
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, NULL);
sqlite3_free((void *)zSql);
if (rc == SQLITE_OK) {
sqlite3_step(stmt);
}
sqlite3_finalize(stmt);
sqlite3_close(db);
return 0;
}

View file

@ -0,0 +1,20 @@
"rescore"
"quantizer"
"bit"
"int8"
"oversample"
"indexed"
"by"
"float"
"("
")"
","
"="
"["
"]"
"1"
"8"
"16"
"128"
"256"
"1024"

View file

@ -0,0 +1,151 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
/**
* Fuzz target: interleaved insert/update/delete/KNN operations on rescore
* tables with BOTH quantizer types, exercising the int8 quantizer path
* and the update code path that the existing rescore-operations.c misses.
*
* Key differences from rescore-operations.c:
* - Tests BOTH bit and int8 quantizers (the existing target only tests bit)
* - Fuzz-controlled query vectors (not fixed [1,0,0,...])
* - Exercises the UPDATE path (line 9080+ in sqlite-vec.c)
* - Tests with 16 dimensions (more realistic, exercises more of the
* quantization loop)
* - Interleaves KNN between mutations to stress the blob_reopen path
* when _rescore_vectors rows have been deleted/modified
*/
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (size < 8) return 0;
int rc;
sqlite3 *db;
sqlite3_stmt *stmtInsert = NULL;
sqlite3_stmt *stmtUpdate = NULL;
sqlite3_stmt *stmtDelete = NULL;
sqlite3_stmt *stmtKnn = NULL;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
/* Use first byte to pick quantizer */
int use_int8 = data[0] & 1;
data++; size--;
const char *create_sql = use_int8
? "CREATE VIRTUAL TABLE v USING vec0("
"emb float[16] indexed by rescore(quantizer=int8))"
: "CREATE VIRTUAL TABLE v USING vec0("
"emb float[16] indexed by rescore(quantizer=bit))";
rc = sqlite3_exec(db, create_sql, NULL, NULL, NULL);
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
sqlite3_prepare_v2(db,
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
sqlite3_prepare_v2(db,
"UPDATE v SET emb = ? WHERE rowid = ?", -1, &stmtUpdate, NULL);
sqlite3_prepare_v2(db,
"DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL);
sqlite3_prepare_v2(db,
"SELECT rowid, distance FROM v WHERE emb MATCH ? "
"ORDER BY distance LIMIT 5", -1, &stmtKnn, NULL);
if (!stmtInsert || !stmtUpdate || !stmtDelete || !stmtKnn)
goto cleanup;
size_t i = 0;
while (i + 2 <= size) {
uint8_t op = data[i++] % 5; /* 5 operations now */
uint8_t rowid_byte = data[i++];
int64_t rowid = (int64_t)(rowid_byte % 24) + 1;
switch (op) {
case 0: {
/* INSERT: consume bytes for 16 floats */
float vec[16] = {0};
for (int j = 0; j < 16 && i < size; j++, i++) {
vec[j] = (float)((int8_t)data[i]) / 8.0f;
}
sqlite3_reset(stmtInsert);
sqlite3_bind_int64(stmtInsert, 1, rowid);
sqlite3_bind_blob(stmtInsert, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
sqlite3_step(stmtInsert);
break;
}
case 1: {
/* DELETE */
sqlite3_reset(stmtDelete);
sqlite3_bind_int64(stmtDelete, 1, rowid);
sqlite3_step(stmtDelete);
break;
}
case 2: {
/* KNN with fuzz-controlled query vector */
float qvec[16] = {0};
for (int j = 0; j < 16 && i < size; j++, i++) {
qvec[j] = (float)((int8_t)data[i]) / 4.0f;
}
sqlite3_reset(stmtKnn);
sqlite3_bind_blob(stmtKnn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
while (sqlite3_step(stmtKnn) == SQLITE_ROW) {
(void)sqlite3_column_int64(stmtKnn, 0);
(void)sqlite3_column_double(stmtKnn, 1);
}
break;
}
case 3: {
/* UPDATE: modify an existing vector (exercises rescore update path) */
float vec[16] = {0};
for (int j = 0; j < 16 && i < size; j++, i++) {
vec[j] = (float)((int8_t)data[i]) / 6.0f;
}
sqlite3_reset(stmtUpdate);
sqlite3_bind_blob(stmtUpdate, 1, vec, sizeof(vec), SQLITE_TRANSIENT);
sqlite3_bind_int64(stmtUpdate, 2, rowid);
sqlite3_step(stmtUpdate);
break;
}
case 4: {
/* INSERT then immediately UPDATE same row (stresses blob lifecycle) */
float vec1[16] = {0};
float vec2[16] = {0};
for (int j = 0; j < 16 && i < size; j++, i++) {
vec1[j] = (float)((int8_t)data[i]) / 10.0f;
vec2[j] = -vec1[j]; /* opposite direction */
}
/* Insert */
sqlite3_reset(stmtInsert);
sqlite3_bind_int64(stmtInsert, 1, rowid);
sqlite3_bind_blob(stmtInsert, 2, vec1, sizeof(vec1), SQLITE_TRANSIENT);
if (sqlite3_step(stmtInsert) == SQLITE_DONE) {
/* Only update if insert succeeded (rowid might already exist) */
sqlite3_reset(stmtUpdate);
sqlite3_bind_blob(stmtUpdate, 1, vec2, sizeof(vec2), SQLITE_TRANSIENT);
sqlite3_bind_int64(stmtUpdate, 2, rowid);
sqlite3_step(stmtUpdate);
}
break;
}
}
}
/* Final consistency check: full scan must not crash */
sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL);
cleanup:
sqlite3_finalize(stmtInsert);
sqlite3_finalize(stmtUpdate);
sqlite3_finalize(stmtDelete);
sqlite3_finalize(stmtKnn);
sqlite3_close(db);
return 0;
}

View file

@ -0,0 +1,178 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
/**
* Fuzz target: deep exercise of rescore KNN with fuzz-controlled query vectors
* and both quantizer types (bit + int8), multiple distance metrics.
*
* The existing rescore-operations.c only tests bit quantizer with a fixed
* query vector. This target:
* - Tests both bit and int8 quantizers
* - Uses fuzz-controlled query vectors (hits NaN/Inf/denormal paths)
* - Tests all distance metrics with int8 (L2, cosine, L1)
* - Exercises large LIMIT values (oversample multiplication)
* - Tests KNN with rowid IN constraints
* - Exercises the insert->query->update->query->delete->query cycle
*/
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (size < 20) return 0;
int rc;
sqlite3 *db;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
/* Use first 4 bytes for configuration */
uint8_t config = data[0];
uint8_t num_inserts = (data[1] % 20) + 3; /* 3..22 inserts */
uint8_t limit_val = (data[2] % 50) + 1; /* 1..50 for LIMIT */
uint8_t metric_choice = data[3] % 3;
data += 4;
size -= 4;
int use_int8 = config & 1;
const char *metric_str;
switch (metric_choice) {
case 0: metric_str = ""; break; /* default L2 */
case 1: metric_str = " distance_metric=cosine"; break;
case 2: metric_str = " distance_metric=l1"; break;
default: metric_str = ""; break;
}
/* Build CREATE TABLE statement */
char create_sql[256];
if (use_int8) {
snprintf(create_sql, sizeof(create_sql),
"CREATE VIRTUAL TABLE v USING vec0("
"emb float[16] indexed by rescore(quantizer=int8)%s)", metric_str);
} else {
/* bit quantizer ignores distance_metric for the coarse pass (always hamming),
but the float rescore phase uses the specified metric */
snprintf(create_sql, sizeof(create_sql),
"CREATE VIRTUAL TABLE v USING vec0("
"emb float[16] indexed by rescore(quantizer=bit)%s)", metric_str);
}
rc = sqlite3_exec(db, create_sql, NULL, NULL, NULL);
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
/* Insert vectors using fuzz data */
{
sqlite3_stmt *ins = NULL;
sqlite3_prepare_v2(db,
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &ins, NULL);
if (!ins) { sqlite3_close(db); return 0; }
size_t cursor = 0;
for (int i = 0; i < num_inserts && cursor + 1 < size; i++) {
float vec[16];
for (int j = 0; j < 16; j++) {
if (cursor < size) {
/* Map fuzz byte to float -- includes potential for
interesting float values via reinterpretation */
int8_t sb = (int8_t)data[cursor++];
vec[j] = (float)sb / 5.0f;
} else {
vec[j] = 0.0f;
}
}
sqlite3_reset(ins);
sqlite3_bind_int64(ins, 1, (sqlite3_int64)(i + 1));
sqlite3_bind_blob(ins, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
sqlite3_step(ins);
}
sqlite3_finalize(ins);
}
/* Build a fuzz-controlled query vector from remaining data */
float qvec[16] = {0};
{
size_t cursor = 0;
for (int j = 0; j < 16 && cursor < size; j++) {
int8_t sb = (int8_t)data[cursor++];
qvec[j] = (float)sb / 3.0f;
}
}
/* KNN query with fuzz-controlled vector and LIMIT */
{
char knn_sql[256];
snprintf(knn_sql, sizeof(knn_sql),
"SELECT rowid, distance FROM v WHERE emb MATCH ? "
"ORDER BY distance LIMIT %d", (int)limit_val);
sqlite3_stmt *knn = NULL;
sqlite3_prepare_v2(db, knn_sql, -1, &knn, NULL);
if (knn) {
sqlite3_bind_blob(knn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
while (sqlite3_step(knn) == SQLITE_ROW) {
/* Read results to ensure distance computation didn't produce garbage
that crashes the cursor iteration */
(void)sqlite3_column_int64(knn, 0);
(void)sqlite3_column_double(knn, 1);
}
sqlite3_finalize(knn);
}
}
/* Update some vectors, then query again */
{
float uvec[16];
for (int j = 0; j < 16; j++) uvec[j] = qvec[15 - j]; /* reverse of query */
sqlite3_stmt *upd = NULL;
sqlite3_prepare_v2(db,
"UPDATE v SET emb = ? WHERE rowid = 1", -1, &upd, NULL);
if (upd) {
sqlite3_bind_blob(upd, 1, uvec, sizeof(uvec), SQLITE_STATIC);
sqlite3_step(upd);
sqlite3_finalize(upd);
}
}
/* Second KNN after update */
{
sqlite3_stmt *knn = NULL;
sqlite3_prepare_v2(db,
"SELECT rowid, distance FROM v WHERE emb MATCH ? "
"ORDER BY distance LIMIT 10", -1, &knn, NULL);
if (knn) {
sqlite3_bind_blob(knn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
while (sqlite3_step(knn) == SQLITE_ROW) {}
sqlite3_finalize(knn);
}
}
/* Delete half the rows, then KNN again */
for (int i = 1; i <= num_inserts; i += 2) {
char del_sql[64];
snprintf(del_sql, sizeof(del_sql),
"DELETE FROM v WHERE rowid = %d", i);
sqlite3_exec(db, del_sql, NULL, NULL, NULL);
}
/* Third KNN after deletes -- exercises distance computation over
zeroed-out slots in the quantized chunk */
{
sqlite3_stmt *knn = NULL;
sqlite3_prepare_v2(db,
"SELECT rowid, distance FROM v WHERE emb MATCH ? "
"ORDER BY distance LIMIT 5", -1, &knn, NULL);
if (knn) {
sqlite3_bind_blob(knn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
while (sqlite3_step(knn) == SQLITE_ROW) {}
sqlite3_finalize(knn);
}
}
sqlite3_close(db);
return 0;
}

View file

@ -0,0 +1,96 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (size < 6) return 0;
int rc;
sqlite3 *db;
sqlite3_stmt *stmtInsert = NULL;
sqlite3_stmt *stmtDelete = NULL;
sqlite3_stmt *stmtKnn = NULL;
sqlite3_stmt *stmtScan = NULL;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
rc = sqlite3_exec(db,
"CREATE VIRTUAL TABLE v USING vec0("
"emb float[8] indexed by rescore(quantizer=bit))",
NULL, NULL, NULL);
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
sqlite3_prepare_v2(db,
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
sqlite3_prepare_v2(db,
"DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL);
sqlite3_prepare_v2(db,
"SELECT rowid, distance FROM v WHERE emb MATCH ? ORDER BY distance LIMIT 3",
-1, &stmtKnn, NULL);
sqlite3_prepare_v2(db,
"SELECT rowid FROM v", -1, &stmtScan, NULL);
if (!stmtInsert || !stmtDelete || !stmtKnn || !stmtScan) goto cleanup;
size_t i = 0;
while (i + 2 <= size) {
uint8_t op = data[i++] % 4;
uint8_t rowid_byte = data[i++];
int64_t rowid = (int64_t)(rowid_byte % 32) + 1;
switch (op) {
case 0: {
// INSERT: consume 32 bytes for 8 floats, or use what's left
float vec[8] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
for (int j = 0; j < 8 && i < size; j++, i++) {
vec[j] = (float)((int8_t)data[i]) / 10.0f;
}
sqlite3_reset(stmtInsert);
sqlite3_bind_int64(stmtInsert, 1, rowid);
sqlite3_bind_blob(stmtInsert, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
sqlite3_step(stmtInsert);
break;
}
case 1: {
// DELETE
sqlite3_reset(stmtDelete);
sqlite3_bind_int64(stmtDelete, 1, rowid);
sqlite3_step(stmtDelete);
break;
}
case 2: {
// KNN query with a fixed query vector
float qvec[8] = {1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
sqlite3_reset(stmtKnn);
sqlite3_bind_blob(stmtKnn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
while (sqlite3_step(stmtKnn) == SQLITE_ROW) {}
break;
}
case 3: {
// Full scan
sqlite3_reset(stmtScan);
while (sqlite3_step(stmtScan) == SQLITE_ROW) {}
break;
}
}
}
// Final operations -- must not crash regardless of prior state
sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL);
cleanup:
sqlite3_finalize(stmtInsert);
sqlite3_finalize(stmtDelete);
sqlite3_finalize(stmtKnn);
sqlite3_finalize(stmtScan);
sqlite3_close(db);
return 0;
}

View file

@ -0,0 +1,177 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
/* Test wrappers from sqlite-vec-rescore.c (SQLITE_VEC_TEST build) */
extern void _test_rescore_quantize_float_to_bit(const float *src, uint8_t *dst, size_t dim);
extern void _test_rescore_quantize_float_to_int8(const float *src, int8_t *dst, size_t dim);
extern size_t _test_rescore_quantized_byte_size_bit(size_t dimensions);
extern size_t _test_rescore_quantized_byte_size_int8(size_t dimensions);
/**
* Fuzz target: edge cases in rescore quantization functions.
*
* The existing rescore-quantize.c only tests dimensions that are multiples of 8
* and never passes special float values. This target:
*
* - Tests rescore_quantized_byte_size with arbitrary dimension values
* (including 0, 1, 7, MAX values -- looking for integer division issues)
* - Passes raw float reinterpretation of fuzz bytes (NaN, Inf, denormals,
* negative zero -- these are the values that break min/max/range logic)
* - Tests the int8 quantizer with all-identical values (range=0 branch)
* - Tests the int8 quantizer with extreme ranges (overflow in scale calc)
* - Tests bit quantizer with exact float threshold (0.0f boundary)
*/
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (size < 8) return 0;
uint8_t mode = data[0] % 5;
data++; size--;
switch (mode) {
case 0: {
/* Test rescore_quantized_byte_size with fuzz-controlled dimensions.
This function does dimensions / CHAR_BIT for bit, dimensions for int8.
We're checking it doesn't do anything weird with edge values. */
if (size < sizeof(size_t)) return 0;
size_t dim;
memcpy(&dim, data, sizeof(dim));
/* These should never crash, just return values */
size_t bit_size = _test_rescore_quantized_byte_size_bit(dim);
size_t int8_size = _test_rescore_quantized_byte_size_int8(dim);
/* Verify basic invariants */
(void)bit_size;
(void)int8_size;
break;
}
case 1: {
/* Bit quantize with raw reinterpreted floats (NaN, Inf, denormal).
The key check: src[i] >= 0.0f -- NaN comparison is always false,
so NaN should produce 0-bits. But denormals and -0.0f are tricky. */
size_t num_floats = size / sizeof(float);
if (num_floats == 0) return 0;
/* Round to multiple of 8 for bit quantizer */
size_t dim = (num_floats / 8) * 8;
if (dim == 0) return 0;
const float *src = (const float *)data;
size_t bit_bytes = dim / 8;
uint8_t *dst = (uint8_t *)malloc(bit_bytes);
if (!dst) return 0;
_test_rescore_quantize_float_to_bit(src, dst, dim);
/* Verify: for each bit, if src >= 0 then bit should be set */
for (size_t i = 0; i < dim; i++) {
int bit_set = (dst[i / 8] >> (i % 8)) & 1;
if (src[i] >= 0.0f) {
assert(bit_set == 1);
} else if (src[i] < 0.0f) {
/* Definitely negative -- bit must be 0 */
assert(bit_set == 0);
}
/* NaN: comparison is false, so bit_set should be 0 */
}
free(dst);
break;
}
case 2: {
/* Int8 quantize with raw reinterpreted floats.
The dangerous paths:
- All values identical (range == 0) -> memset path
- vmin/vmax with NaN (NaN < anything is false, NaN > anything is false)
- Extreme range causing scale = 255/range to be Inf or 0
- denormals near the clamping boundaries */
size_t num_floats = size / sizeof(float);
if (num_floats == 0) return 0;
const float *src = (const float *)data;
int8_t *dst = (int8_t *)malloc(num_floats);
if (!dst) return 0;
_test_rescore_quantize_float_to_int8(src, dst, num_floats);
/* Output must always be in [-128, 127] (trivially true for int8_t,
but check the actual clamping logic worked) */
for (size_t i = 0; i < num_floats; i++) {
assert(dst[i] >= -128 && dst[i] <= 127);
}
free(dst);
break;
}
case 3: {
/* Int8 quantize stress: all-same values (range=0 branch) */
size_t dim = (size < 64) ? size : 64;
if (dim == 0) return 0;
float *src = (float *)malloc(dim * sizeof(float));
int8_t *dst = (int8_t *)malloc(dim);
if (!src || !dst) { free(src); free(dst); return 0; }
/* Fill with a single value derived from fuzz data */
float val;
memcpy(&val, data, sizeof(float) < size ? sizeof(float) : size);
for (size_t i = 0; i < dim; i++) src[i] = val;
_test_rescore_quantize_float_to_int8(src, dst, dim);
/* All outputs should be 0 when range == 0 */
for (size_t i = 0; i < dim; i++) {
assert(dst[i] == 0);
}
free(src);
free(dst);
break;
}
case 4: {
/* Int8 quantize with extreme range: one huge positive, one huge negative.
Tests scale = 255.0f / range overflow to Inf, then v * Inf = Inf,
then clamping to [-128, 127]. */
if (size < 2 * sizeof(float)) return 0;
float extreme[2];
memcpy(extreme, data, 2 * sizeof(float));
/* Only test if both are finite (NaN/Inf tested in case 2) */
if (!isfinite(extreme[0]) || !isfinite(extreme[1])) return 0;
/* Build a vector with these two extreme values plus some fuzz */
size_t dim = 16;
float src[16];
src[0] = extreme[0];
src[1] = extreme[1];
for (size_t i = 2; i < dim; i++) {
if (2 * sizeof(float) + (i - 2) < size) {
src[i] = (float)((int8_t)data[2 * sizeof(float) + (i - 2)]) * 1000.0f;
} else {
src[i] = 0.0f;
}
}
int8_t dst[16];
_test_rescore_quantize_float_to_int8(src, dst, dim);
for (size_t i = 0; i < dim; i++) {
assert(dst[i] >= -128 && dst[i] <= 127);
}
break;
}
}
return 0;
}

View file

@ -0,0 +1,54 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
/* These are SQLITE_VEC_TEST wrappers defined in sqlite-vec-rescore.c */
extern void _test_rescore_quantize_float_to_bit(const float *src, uint8_t *dst, size_t dim);
extern void _test_rescore_quantize_float_to_int8(const float *src, int8_t *dst, size_t dim);
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
/* Need at least 4 bytes for one float */
if (size < 4) return 0;
/* Use the input as an array of floats. Dimensions must be a multiple of 8
* for the bit quantizer. */
size_t num_floats = size / sizeof(float);
if (num_floats == 0) return 0;
/* Round down to multiple of 8 for bit quantizer compatibility */
size_t dim = (num_floats / 8) * 8;
if (dim == 0) dim = 8;
if (dim > num_floats) return 0;
const float *src = (const float *)data;
/* Allocate output buffers */
size_t bit_bytes = dim / 8;
uint8_t *bit_dst = (uint8_t *)malloc(bit_bytes);
int8_t *int8_dst = (int8_t *)malloc(dim);
if (!bit_dst || !int8_dst) {
free(bit_dst);
free(int8_dst);
return 0;
}
/* Test bit quantization */
_test_rescore_quantize_float_to_bit(src, bit_dst, dim);
/* Test int8 quantization */
_test_rescore_quantize_float_to_int8(src, int8_dst, dim);
/* Verify int8 output is in range */
for (size_t i = 0; i < dim; i++) {
assert(int8_dst[i] >= -128 && int8_dst[i] <= 127);
}
free(bit_dst);
free(int8_dst);
return 0;
}

View file

@ -0,0 +1,230 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
/**
* Fuzz target: corrupt rescore shadow tables then exercise KNN/read/write.
*
* This targets the dangerous code paths in rescore_knn (Phase 1 + 2):
* - sqlite3_blob_read into baseVectors with potentially wrong-sized blobs
* - distance computation on corrupted/partial quantized data
* - blob_reopen on _rescore_vectors with missing/corrupted rows
* - insert/delete after corruption (blob_write to wrong offsets)
*
* The existing shadow-corrupt.c only tests vec0 without rescore.
*/
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (size < 4) return 0;
int rc;
sqlite3 *db;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
/* Pick quantizer type from first byte */
int use_int8 = data[0] & 1;
int target = (data[1] % 8);
const uint8_t *payload = data + 2;
int payload_size = (int)(size - 2);
const char *create_sql = use_int8
? "CREATE VIRTUAL TABLE v USING vec0("
"emb float[16] indexed by rescore(quantizer=int8))"
: "CREATE VIRTUAL TABLE v USING vec0("
"emb float[16] indexed by rescore(quantizer=bit))";
rc = sqlite3_exec(db, create_sql, NULL, NULL, NULL);
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
/* Insert several vectors so there's a full chunk to corrupt */
{
sqlite3_stmt *ins = NULL;
sqlite3_prepare_v2(db,
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &ins, NULL);
if (!ins) { sqlite3_close(db); return 0; }
for (int i = 1; i <= 8; i++) {
float vec[16];
for (int j = 0; j < 16; j++) vec[j] = (float)(i * 10 + j) / 100.0f;
sqlite3_reset(ins);
sqlite3_bind_int64(ins, 1, i);
sqlite3_bind_blob(ins, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
sqlite3_step(ins);
}
sqlite3_finalize(ins);
}
/* Now corrupt rescore shadow tables based on fuzz input */
sqlite3_stmt *stmt = NULL;
switch (target) {
case 0: {
/* Corrupt _rescore_chunks00 vectors blob with fuzz data */
rc = sqlite3_prepare_v2(db,
"UPDATE v_rescore_chunks00 SET vectors = ? WHERE rowid = 1",
-1, &stmt, NULL);
if (rc == SQLITE_OK) {
sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
stmt = NULL;
}
break;
}
case 1: {
/* Corrupt _rescore_vectors00 vector blob for a specific row */
rc = sqlite3_prepare_v2(db,
"UPDATE v_rescore_vectors00 SET vector = ? WHERE rowid = 3",
-1, &stmt, NULL);
if (rc == SQLITE_OK) {
sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
stmt = NULL;
}
break;
}
case 2: {
/* Truncate the quantized chunk blob to wrong size */
rc = sqlite3_prepare_v2(db,
"UPDATE v_rescore_chunks00 SET vectors = X'DEADBEEF' WHERE rowid = 1",
-1, &stmt, NULL);
if (rc == SQLITE_OK) {
sqlite3_step(stmt);
sqlite3_finalize(stmt);
stmt = NULL;
}
break;
}
case 3: {
/* Delete rows from _rescore_vectors (orphan the float vectors) */
sqlite3_exec(db,
"DELETE FROM v_rescore_vectors00 WHERE rowid IN (2, 4, 6)",
NULL, NULL, NULL);
break;
}
case 4: {
/* Delete the chunk row entirely from _rescore_chunks */
sqlite3_exec(db,
"DELETE FROM v_rescore_chunks00 WHERE rowid = 1",
NULL, NULL, NULL);
break;
}
case 5: {
/* Set vectors to NULL in _rescore_chunks */
sqlite3_exec(db,
"UPDATE v_rescore_chunks00 SET vectors = NULL WHERE rowid = 1",
NULL, NULL, NULL);
break;
}
case 6: {
/* Set vector to NULL in _rescore_vectors */
sqlite3_exec(db,
"UPDATE v_rescore_vectors00 SET vector = NULL WHERE rowid = 3",
NULL, NULL, NULL);
break;
}
case 7: {
/* Corrupt BOTH tables with fuzz data */
int half = payload_size / 2;
rc = sqlite3_prepare_v2(db,
"UPDATE v_rescore_chunks00 SET vectors = ? WHERE rowid = 1",
-1, &stmt, NULL);
if (rc == SQLITE_OK) {
sqlite3_bind_blob(stmt, 1, payload, half, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
stmt = NULL;
}
rc = sqlite3_prepare_v2(db,
"UPDATE v_rescore_vectors00 SET vector = ? WHERE rowid = 1",
-1, &stmt, NULL);
if (rc == SQLITE_OK) {
sqlite3_bind_blob(stmt, 1, payload + half,
payload_size - half, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
stmt = NULL;
}
break;
}
}
/* Exercise ALL read/write paths -- NONE should crash */
/* KNN query (triggers rescore_knn Phase 1 + Phase 2) */
{
float qvec[16] = {1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1};
sqlite3_stmt *knn = NULL;
sqlite3_prepare_v2(db,
"SELECT rowid, distance FROM v WHERE emb MATCH ? "
"ORDER BY distance LIMIT 5", -1, &knn, NULL);
if (knn) {
sqlite3_bind_blob(knn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
while (sqlite3_step(knn) == SQLITE_ROW) {}
sqlite3_finalize(knn);
}
}
/* Full scan (triggers reading from _rescore_vectors) */
sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL);
/* Point lookups */
sqlite3_exec(db, "SELECT * FROM v WHERE rowid = 1", NULL, NULL, NULL);
sqlite3_exec(db, "SELECT * FROM v WHERE rowid = 3", NULL, NULL, NULL);
/* Insert after corruption */
{
float vec[16] = {0};
sqlite3_stmt *ins = NULL;
sqlite3_prepare_v2(db,
"INSERT INTO v(rowid, emb) VALUES (99, ?)", -1, &ins, NULL);
if (ins) {
sqlite3_bind_blob(ins, 1, vec, sizeof(vec), SQLITE_STATIC);
sqlite3_step(ins);
sqlite3_finalize(ins);
}
}
/* Delete after corruption */
sqlite3_exec(db, "DELETE FROM v WHERE rowid = 5", NULL, NULL, NULL);
/* Update after corruption */
{
float vec[16] = {1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1};
sqlite3_stmt *upd = NULL;
sqlite3_prepare_v2(db,
"UPDATE v SET emb = ? WHERE rowid = 1", -1, &upd, NULL);
if (upd) {
sqlite3_bind_blob(upd, 1, vec, sizeof(vec), SQLITE_STATIC);
sqlite3_step(upd);
sqlite3_finalize(upd);
}
}
/* KNN again after modifications to corrupted state */
{
float qvec[16] = {0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1};
sqlite3_stmt *knn = NULL;
sqlite3_prepare_v2(db,
"SELECT rowid, distance FROM v WHERE emb MATCH ? "
"ORDER BY distance LIMIT 3", -1, &knn, NULL);
if (knn) {
sqlite3_bind_blob(knn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
while (sqlite3_step(knn) == SQLITE_ROW) {}
sqlite3_finalize(knn);
}
}
sqlite3_exec(db, "DROP TABLE v", NULL, NULL, NULL);
sqlite3_close(db);
return 0;
}