mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Add FTS5-style command column and runtime oversample for rescore
Replace the old INSERT INTO t(rowid) VALUES('command') hack with a
proper hidden command column named after the table (FTS5 pattern):
INSERT INTO t(t) VALUES ('oversample=16')
The command column is the first hidden column (before distance and k)
to reserve ability for future table-valued function argument use.
Schema: CREATE TABLE x(rowid, <cols>, "<table>" hidden, distance hidden, k hidden)
For backwards compat, pre-v0.1.10 tables (detected via _info shadow
table version) skip the command column to avoid name conflicts with
user columns that may share the table's name. Verified with legacy
fixture DB generated by sqlite-vec v0.1.6.
Changes:
- Add hidden command column to sqlite3_declare_vtab for new tables
- Version-gate via _info shadow table for existing tables
- Validate at CREATE time that no column name matches table name
- Add rescore_handle_command() with oversample=N support
- rescore_knn() prefers runtime oversample_search over CREATE default
- Remove old rowid-based command dispatch
- Migrate all DiskANN/IVF/fuzz tests and benchmarks to new syntax
- Add legacy DB fixture (v0.1.6) and 9 backwards-compat tests
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b7fc459be4
commit
6e2c4c6bab
21 changed files with 512 additions and 105 deletions
|
|
@ -50,7 +50,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
{
|
||||
sqlite3_stmt *stmt;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmt, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmt, NULL);
|
||||
for (int i = 1; i <= 8; i++) {
|
||||
float vec[8];
|
||||
for (int j = 0; j < 8; j++) vec[j] = (float)i * 0.1f + (float)j * 0.01f;
|
||||
|
|
@ -66,11 +66,11 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
sqlite3_stmt *stmtInsert = NULL;
|
||||
sqlite3_stmt *stmtKnn = NULL;
|
||||
|
||||
/* Commands are dispatched via INSERT INTO t(rowid) VALUES ('cmd_string') */
|
||||
/* Commands are dispatched via INSERT INTO t(t) VALUES ('cmd_string') */
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid) VALUES (?)", -1, &stmtCmd, NULL);
|
||||
"INSERT INTO v(v) VALUES (?)", -1, &stmtCmd, NULL);
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
sqlite3_prepare_v2(db,
|
||||
"SELECT rowid, distance FROM v WHERE emb MATCH ? AND k = ?",
|
||||
-1, &stmtKnn, NULL);
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
// Insert enough vectors to overflow at least one cell
|
||||
sqlite3_stmt *stmtInsert = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
if (!stmtInsert) { sqlite3_close(db); return 0; }
|
||||
|
||||
size_t offset = 0;
|
||||
|
|
@ -81,7 +81,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// Train to assign vectors to centroids (triggers cell building)
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Delete vectors at boundary positions based on fuzz data
|
||||
|
|
@ -102,7 +102,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
{
|
||||
sqlite3_stmt *si = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &si, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &si, NULL);
|
||||
if (si) {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
float *vec = sqlite3_malloc(dim * sizeof(float));
|
||||
|
|
@ -140,7 +140,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
// Test assign-vectors with multi-cell state
|
||||
// First clear centroids
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('clear-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('clear-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Set centroids manually, then assign
|
||||
|
|
@ -151,7 +151,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
char cmd[128];
|
||||
snprintf(cmd, sizeof(cmd),
|
||||
"INSERT INTO v(rowid, emb) VALUES ('set-centroid:%d', ?)", c);
|
||||
"INSERT INTO v(v, emb) VALUES ('set-centroid:%d', ?)", c);
|
||||
sqlite3_stmt *sc = NULL;
|
||||
sqlite3_prepare_v2(db, cmd, -1, &sc, NULL);
|
||||
if (sc) {
|
||||
|
|
@ -163,7 +163,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
}
|
||||
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('assign-vectors')",
|
||||
"INSERT INTO v(v) VALUES ('assign-vectors')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Final query after assign-vectors
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
// Insert vectors
|
||||
sqlite3_stmt *stmtInsert = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
if (!stmtInsert) { sqlite3_close(db); return 0; }
|
||||
|
||||
size_t offset = 0;
|
||||
|
|
@ -125,14 +125,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// Clear centroids and re-compute to test round-trip
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('clear-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('clear-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Insert a few more vectors in untrained state
|
||||
{
|
||||
sqlite3_stmt *si = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &si, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &si, NULL);
|
||||
if (si) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
float *vec = sqlite3_malloc(dim * sizeof(float));
|
||||
|
|
@ -150,7 +150,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// Re-train
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Delete some rows after training, then query
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
// Insert vectors
|
||||
sqlite3_stmt *stmtInsert = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
if (!stmtInsert) { sqlite3_close(db); return 0; }
|
||||
|
||||
size_t offset = 0;
|
||||
|
|
@ -134,14 +134,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// Train
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Change nprobe at runtime (can exceed nlist -- tests clamping in query)
|
||||
{
|
||||
char cmd[64];
|
||||
snprintf(cmd, sizeof(cmd),
|
||||
"INSERT INTO v(rowid) VALUES ('nprobe=%d')", nprobe_initial);
|
||||
"INSERT INTO v(v) VALUES ('nprobe=%d')", nprobe_initial);
|
||||
sqlite3_exec(db, cmd, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
|
||||
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
sqlite3_prepare_v2(db,
|
||||
"DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL);
|
||||
sqlite3_prepare_v2(db,
|
||||
|
|
@ -82,14 +82,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
case 4: {
|
||||
// compute-centroids command
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
// clear-centroids command
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('clear-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('clear-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
break;
|
||||
}
|
||||
|
|
@ -100,7 +100,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
int nprobe = (n % 4) + 1;
|
||||
char buf[64];
|
||||
snprintf(buf, sizeof(buf),
|
||||
"INSERT INTO v(rowid) VALUES ('nprobe=%d')", nprobe);
|
||||
"INSERT INTO v(v) VALUES ('nprobe=%d')", nprobe);
|
||||
sqlite3_exec(db, buf, NULL, NULL, NULL);
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
// Insert vectors with fuzz-controlled float values
|
||||
sqlite3_stmt *stmtInsert = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
if (!stmtInsert) { sqlite3_close(db); return 0; }
|
||||
|
||||
size_t offset = 0;
|
||||
|
|
@ -93,7 +93,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// Trigger compute-centroids to exercise kmeans + quantization together
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// KNN query with fuzz-derived query vector
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
// Insert vectors with diverse values
|
||||
sqlite3_stmt *stmtInsert = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
if (!stmtInsert) { sqlite3_close(db); return 0; }
|
||||
|
||||
size_t offset = 0;
|
||||
|
|
@ -103,7 +103,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// Train
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Multiple KNN queries to exercise rescore path
|
||||
|
|
@ -156,7 +156,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// Retrain after deletions
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Query after retrain
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
{
|
||||
sqlite3_stmt *si = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &si, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &si, NULL);
|
||||
if (!si) { sqlite3_close(db); return 0; }
|
||||
for (int i = 0; i < 10; i++) {
|
||||
float vec[8];
|
||||
|
|
@ -63,7 +63,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// Train
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// Now corrupt shadow tables based on fuzz input
|
||||
|
|
@ -204,7 +204,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
float newvec[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
sqlite3_stmt *si = NULL;
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &si, NULL);
|
||||
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &si, NULL);
|
||||
if (si) {
|
||||
sqlite3_bind_int64(si, 1, 100);
|
||||
sqlite3_bind_blob(si, 2, newvec, sizeof(newvec), SQLITE_STATIC);
|
||||
|
|
@ -215,12 +215,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||
|
||||
// compute-centroids over corrupted state
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('compute-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
// clear-centroids
|
||||
sqlite3_exec(db,
|
||||
"INSERT INTO v(rowid) VALUES ('clear-centroids')",
|
||||
"INSERT INTO v(v) VALUES ('clear-centroids')",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
sqlite3_close(db);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue