Add FTS5-style command column and runtime oversample for rescore

Replace the old INSERT INTO t(rowid) VALUES('command') hack with a
proper hidden command column named after the table (FTS5 pattern):

  INSERT INTO t(t) VALUES ('oversample=16')

The command column is the first hidden column (before distance and k)
to reserve ability for future table-valued function argument use.

Schema: CREATE TABLE x(rowid, <cols>, "<table>" hidden, distance hidden, k hidden)

For backwards compat, pre-v0.1.10 tables (detected via _info shadow
table version) skip the command column to avoid name conflicts with
user columns that may share the table's name. Verified with legacy
fixture DB generated by sqlite-vec v0.1.6.

Changes:
- Add hidden command column to sqlite3_declare_vtab for new tables
- Version-gate via _info shadow table for existing tables
- Validate at CREATE time that no column name matches table name
- Add rescore_handle_command() with oversample=N support
- rescore_knn() prefers runtime oversample_search over CREATE default
- Remove old rowid-based command dispatch
- Migrate all DiskANN/IVF/fuzz tests and benchmarks to new syntax
- Add legacy DB fixture (v0.1.6) and 9 backwards-compat tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-31 22:39:18 -07:00
parent b7fc459be4
commit 6e2c4c6bab
21 changed files with 512 additions and 105 deletions

View file

@ -46,7 +46,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
{
sqlite3_stmt *si = NULL;
sqlite3_prepare_v2(db,
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &si, NULL);
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &si, NULL);
if (!si) { sqlite3_close(db); return 0; }
for (int i = 0; i < 10; i++) {
float vec[8];
@ -63,7 +63,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// Train
sqlite3_exec(db,
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
"INSERT INTO v(v) VALUES ('compute-centroids')",
NULL, NULL, NULL);
// Now corrupt shadow tables based on fuzz input
@ -204,7 +204,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
float newvec[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
sqlite3_stmt *si = NULL;
sqlite3_prepare_v2(db,
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &si, NULL);
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &si, NULL);
if (si) {
sqlite3_bind_int64(si, 1, 100);
sqlite3_bind_blob(si, 2, newvec, sizeof(newvec), SQLITE_STATIC);
@ -215,12 +215,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// compute-centroids over corrupted state
sqlite3_exec(db,
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
"INSERT INTO v(v) VALUES ('compute-centroids')",
NULL, NULL, NULL);
// clear-centroids
sqlite3_exec(db,
"INSERT INTO v(rowid) VALUES ('clear-centroids')",
"INSERT INTO v(v) VALUES ('clear-centroids')",
NULL, NULL, NULL);
sqlite3_close(db);