#include #include #include #include #include #include "sqlite-vec.h" #include "sqlite3.h" #include /** * Fuzz target: deep exercise of rescore KNN with fuzz-controlled query vectors * and both quantizer types (bit + int8), multiple distance metrics. * * The existing rescore-operations.c only tests bit quantizer with a fixed * query vector. This target: * - Tests both bit and int8 quantizers * - Uses fuzz-controlled query vectors (hits NaN/Inf/denormal paths) * - Tests all distance metrics with int8 (L2, cosine, L1) * - Exercises large LIMIT values (oversample multiplication) * - Tests KNN with rowid IN constraints * - Exercises the insert->query->update->query->delete->query cycle */ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size < 20) return 0; int rc; sqlite3 *db; rc = sqlite3_open(":memory:", &db); assert(rc == SQLITE_OK); rc = sqlite3_vec_init(db, NULL, NULL); assert(rc == SQLITE_OK); /* Use first 4 bytes for configuration */ uint8_t config = data[0]; uint8_t num_inserts = (data[1] % 20) + 3; /* 3..22 inserts */ uint8_t limit_val = (data[2] % 50) + 1; /* 1..50 for LIMIT */ uint8_t metric_choice = data[3] % 3; data += 4; size -= 4; int use_int8 = config & 1; const char *metric_str; switch (metric_choice) { case 0: metric_str = ""; break; /* default L2 */ case 1: metric_str = " distance_metric=cosine"; break; case 2: metric_str = " distance_metric=l1"; break; default: metric_str = ""; break; } /* Build CREATE TABLE statement */ char create_sql[256]; if (use_int8) { snprintf(create_sql, sizeof(create_sql), "CREATE VIRTUAL TABLE v USING vec0(" "emb float[16] indexed by rescore(quantizer=int8)%s)", metric_str); } else { /* bit quantizer ignores distance_metric for the coarse pass (always hamming), but the float rescore phase uses the specified metric */ snprintf(create_sql, sizeof(create_sql), "CREATE VIRTUAL TABLE v USING vec0(" "emb float[16] indexed by rescore(quantizer=bit)%s)", metric_str); } rc = sqlite3_exec(db, create_sql, NULL, NULL, NULL); if (rc != SQLITE_OK) { sqlite3_close(db); return 0; } /* Insert vectors using fuzz data */ { sqlite3_stmt *ins = NULL; sqlite3_prepare_v2(db, "INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &ins, NULL); if (!ins) { sqlite3_close(db); return 0; } size_t cursor = 0; for (int i = 0; i < num_inserts && cursor + 1 < size; i++) { float vec[16]; for (int j = 0; j < 16; j++) { if (cursor < size) { /* Map fuzz byte to float -- includes potential for interesting float values via reinterpretation */ int8_t sb = (int8_t)data[cursor++]; vec[j] = (float)sb / 5.0f; } else { vec[j] = 0.0f; } } sqlite3_reset(ins); sqlite3_bind_int64(ins, 1, (sqlite3_int64)(i + 1)); sqlite3_bind_blob(ins, 2, vec, sizeof(vec), SQLITE_TRANSIENT); sqlite3_step(ins); } sqlite3_finalize(ins); } /* Build a fuzz-controlled query vector from remaining data */ float qvec[16] = {0}; { size_t cursor = 0; for (int j = 0; j < 16 && cursor < size; j++) { int8_t sb = (int8_t)data[cursor++]; qvec[j] = (float)sb / 3.0f; } } /* KNN query with fuzz-controlled vector and LIMIT */ { char knn_sql[256]; snprintf(knn_sql, sizeof(knn_sql), "SELECT rowid, distance FROM v WHERE emb MATCH ? " "ORDER BY distance LIMIT %d", (int)limit_val); sqlite3_stmt *knn = NULL; sqlite3_prepare_v2(db, knn_sql, -1, &knn, NULL); if (knn) { sqlite3_bind_blob(knn, 1, qvec, sizeof(qvec), SQLITE_STATIC); while (sqlite3_step(knn) == SQLITE_ROW) { /* Read results to ensure distance computation didn't produce garbage that crashes the cursor iteration */ (void)sqlite3_column_int64(knn, 0); (void)sqlite3_column_double(knn, 1); } sqlite3_finalize(knn); } } /* Update some vectors, then query again */ { float uvec[16]; for (int j = 0; j < 16; j++) uvec[j] = qvec[15 - j]; /* reverse of query */ sqlite3_stmt *upd = NULL; sqlite3_prepare_v2(db, "UPDATE v SET emb = ? WHERE rowid = 1", -1, &upd, NULL); if (upd) { sqlite3_bind_blob(upd, 1, uvec, sizeof(uvec), SQLITE_STATIC); sqlite3_step(upd); sqlite3_finalize(upd); } } /* Second KNN after update */ { sqlite3_stmt *knn = NULL; sqlite3_prepare_v2(db, "SELECT rowid, distance FROM v WHERE emb MATCH ? " "ORDER BY distance LIMIT 10", -1, &knn, NULL); if (knn) { sqlite3_bind_blob(knn, 1, qvec, sizeof(qvec), SQLITE_STATIC); while (sqlite3_step(knn) == SQLITE_ROW) {} sqlite3_finalize(knn); } } /* Delete half the rows, then KNN again */ for (int i = 1; i <= num_inserts; i += 2) { char del_sql[64]; snprintf(del_sql, sizeof(del_sql), "DELETE FROM v WHERE rowid = %d", i); sqlite3_exec(db, del_sql, NULL, NULL, NULL); } /* Third KNN after deletes -- exercises distance computation over zeroed-out slots in the quantized chunk */ { sqlite3_stmt *knn = NULL; sqlite3_prepare_v2(db, "SELECT rowid, distance FROM v WHERE emb MATCH ? " "ORDER BY distance LIMIT 5", -1, &knn, NULL); if (knn) { sqlite3_bind_blob(knn, 1, qvec, sizeof(qvec), SQLITE_STATIC); while (sqlite3_step(knn) == SQLITE_ROW) {} sqlite3_finalize(knn); } } sqlite3_close(db); return 0; }