mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
Add vec0 optimize command: compact sparse chunks after deletions
Implements FTS5-style INSERT INTO v(v) VALUES ('optimize') command that
packs live entries from newer/sparser chunks into free slots of older
chunks, then deletes emptied chunks. Adds hidden command column to vtab
schema, command dispatcher in xUpdate, and two-pointer compaction
algorithm that handles vectors, all metadata types, and partitioned tables.
Includes 16 Python tests, 7 C unit tests, and a libFuzzer target.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
56707c4c09
commit
ce3fdec86d
5 changed files with 1358 additions and 2 deletions
512
sqlite-vec.c
512
sqlite-vec.c
|
|
@ -3409,6 +3409,7 @@ static sqlite3_module vec_npy_eachModule = {
|
||||||
#define VEC0_COLUMN_USERN_START 1
|
#define VEC0_COLUMN_USERN_START 1
|
||||||
#define VEC0_COLUMN_OFFSET_DISTANCE 1
|
#define VEC0_COLUMN_OFFSET_DISTANCE 1
|
||||||
#define VEC0_COLUMN_OFFSET_K 2
|
#define VEC0_COLUMN_OFFSET_K 2
|
||||||
|
#define VEC0_COLUMN_OFFSET_CMD 3
|
||||||
|
|
||||||
#define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\""
|
#define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\""
|
||||||
|
|
||||||
|
|
@ -3685,6 +3686,16 @@ int vec0_column_k_idx(vec0_vtab *p) {
|
||||||
VEC0_COLUMN_OFFSET_K;
|
VEC0_COLUMN_OFFSET_K;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns the column index for the hidden command column.
|
||||||
|
* This column shares the table name and is used for FTS5-style insert commands
|
||||||
|
* like: INSERT INTO t(t) VALUES ('optimize');
|
||||||
|
*/
|
||||||
|
int vec0_column_cmd_idx(vec0_vtab *p) {
|
||||||
|
return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
|
||||||
|
VEC0_COLUMN_OFFSET_CMD;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns 1 if the given column-based index is a valid vector column,
|
* Returns 1 if the given column-based index is a valid vector column,
|
||||||
* 0 otherwise.
|
* 0 otherwise.
|
||||||
|
|
@ -4961,7 +4972,7 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
sqlite3_str_appendall(createStr, " distance hidden, k hidden) ");
|
sqlite3_str_appendf(createStr, " distance hidden, k hidden, \"%w\" hidden) ", argv[2]);
|
||||||
if (pkColumnName) {
|
if (pkColumnName) {
|
||||||
sqlite3_str_appendall(createStr, "without rowid ");
|
sqlite3_str_appendall(createStr, "without rowid ");
|
||||||
}
|
}
|
||||||
|
|
@ -8305,11 +8316,32 @@ int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid,
|
||||||
*
|
*
|
||||||
* @return int SQLITE_OK on success, otherwise error code on failure
|
* @return int SQLITE_OK on success, otherwise error code on failure
|
||||||
*/
|
*/
|
||||||
|
static int vec0_optimize(vec0_vtab *p);
|
||||||
|
|
||||||
|
static int vec0Update_InsertCommand(sqlite3_vtab *pVTab, sqlite3_value *cmdValue) {
|
||||||
|
const char *zCmd = (const char *)sqlite3_value_text(cmdValue);
|
||||||
|
if (sqlite3_stricmp(zCmd, "optimize") == 0) {
|
||||||
|
return vec0_optimize((vec0_vtab *)pVTab);
|
||||||
|
}
|
||||||
|
vtab_set_error(pVTab, "Unknown vec0 command: \"%s\"", zCmd);
|
||||||
|
return SQLITE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
|
int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
|
||||||
sqlite_int64 *pRowid) {
|
sqlite_int64 *pRowid) {
|
||||||
UNUSED_PARAMETER(argc);
|
UNUSED_PARAMETER(argc);
|
||||||
vec0_vtab *p = (vec0_vtab *)pVTab;
|
vec0_vtab *p = (vec0_vtab *)pVTab;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
|
// Check for FTS5-style insert commands: INSERT INTO t(t) VALUES ('cmd')
|
||||||
|
{
|
||||||
|
int cmd_argv_idx = 2 + vec0_column_cmd_idx(p);
|
||||||
|
if (cmd_argv_idx < argc &&
|
||||||
|
sqlite3_value_type(argv[cmd_argv_idx]) == SQLITE_TEXT) {
|
||||||
|
return vec0Update_InsertCommand(pVTab, argv[cmd_argv_idx]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow
|
// Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow
|
||||||
// table
|
// table
|
||||||
i64 rowid;
|
i64 rowid;
|
||||||
|
|
@ -9008,6 +9040,484 @@ int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// vec0 optimize: pack live entries into older chunks, delete empty ones
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information about a single chunk loaded during optimize.
|
||||||
|
*/
|
||||||
|
struct vec0_optimize_chunk {
|
||||||
|
i64 chunk_id;
|
||||||
|
int validity_size; // bytes in validity bitmap
|
||||||
|
unsigned char *validity; // in-memory validity bitmap (owned)
|
||||||
|
int rowids_size; // bytes in rowids blob
|
||||||
|
i64 *rowids; // in-memory rowids array (owned)
|
||||||
|
int modified; // 1 if validity/rowids were changed and need flush
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Move one entry from (src_chunk, src_offset) to (dst_chunk, dst_offset).
|
||||||
|
* Copies vector data, metadata data, updates rowids position.
|
||||||
|
* In-memory validity/rowids are updated in the caller.
|
||||||
|
*/
|
||||||
|
static int vec0_optimize_move_entry(
|
||||||
|
vec0_vtab *p,
|
||||||
|
struct vec0_optimize_chunk *src, i64 src_offset,
|
||||||
|
struct vec0_optimize_chunk *dst, i64 dst_offset) {
|
||||||
|
int rc;
|
||||||
|
i64 rowid = src->rowids[src_offset];
|
||||||
|
|
||||||
|
// 1. Move vector data for each vector column
|
||||||
|
for (int i = 0; i < p->numVectorColumns; i++) {
|
||||||
|
size_t vec_size = vector_column_byte_size(p->vector_columns[i]);
|
||||||
|
void *buf = sqlite3_malloc(vec_size);
|
||||||
|
if (!buf) return SQLITE_NOMEM;
|
||||||
|
|
||||||
|
// Read from source
|
||||||
|
sqlite3_blob *blob = NULL;
|
||||||
|
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
|
||||||
|
"vectors", src->chunk_id, 1, &blob);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_free(buf); return rc; }
|
||||||
|
rc = sqlite3_blob_read(blob, buf, vec_size, src_offset * vec_size);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_blob_close(blob); sqlite3_free(buf); return rc; }
|
||||||
|
// Zero the source slot
|
||||||
|
void *zeros = sqlite3_malloc(vec_size);
|
||||||
|
if (!zeros) { sqlite3_blob_close(blob); sqlite3_free(buf); return SQLITE_NOMEM; }
|
||||||
|
memset(zeros, 0, vec_size);
|
||||||
|
rc = sqlite3_blob_write(blob, zeros, vec_size, src_offset * vec_size);
|
||||||
|
sqlite3_free(zeros);
|
||||||
|
sqlite3_blob_close(blob);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_free(buf); return rc; }
|
||||||
|
|
||||||
|
// Write to destination
|
||||||
|
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
|
||||||
|
"vectors", dst->chunk_id, 1, &blob);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_free(buf); return rc; }
|
||||||
|
rc = sqlite3_blob_write(blob, buf, vec_size, dst_offset * vec_size);
|
||||||
|
sqlite3_blob_close(blob);
|
||||||
|
sqlite3_free(buf);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Move metadata for each metadata column
|
||||||
|
for (int i = 0; i < p->numMetadataColumns; i++) {
|
||||||
|
vec0_metadata_column_kind kind = p->metadata_columns[i].kind;
|
||||||
|
|
||||||
|
if (kind == VEC0_METADATA_COLUMN_KIND_BOOLEAN) {
|
||||||
|
// Boolean: bit-level copy
|
||||||
|
sqlite3_blob *srcBlob = NULL, *dstBlob = NULL;
|
||||||
|
rc = sqlite3_blob_open(p->db, p->schemaName,
|
||||||
|
p->shadowMetadataChunksNames[i], "data",
|
||||||
|
src->chunk_id, 1, &srcBlob);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
|
||||||
|
int blobSize = sqlite3_blob_bytes(srcBlob);
|
||||||
|
unsigned char *srcBuf = sqlite3_malloc(blobSize);
|
||||||
|
if (!srcBuf) { sqlite3_blob_close(srcBlob); return SQLITE_NOMEM; }
|
||||||
|
rc = sqlite3_blob_read(srcBlob, srcBuf, blobSize, 0);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_free(srcBuf); sqlite3_blob_close(srcBlob); return rc; }
|
||||||
|
|
||||||
|
int srcBit = bitmap_get(srcBuf, src_offset);
|
||||||
|
// Clear source bit
|
||||||
|
bitmap_set(srcBuf, src_offset, 0);
|
||||||
|
rc = sqlite3_blob_write(srcBlob, srcBuf, blobSize, 0);
|
||||||
|
sqlite3_blob_close(srcBlob);
|
||||||
|
sqlite3_free(srcBuf);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
|
||||||
|
// Set destination bit
|
||||||
|
rc = sqlite3_blob_open(p->db, p->schemaName,
|
||||||
|
p->shadowMetadataChunksNames[i], "data",
|
||||||
|
dst->chunk_id, 1, &dstBlob);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
|
||||||
|
blobSize = sqlite3_blob_bytes(dstBlob);
|
||||||
|
unsigned char *dstBuf = sqlite3_malloc(blobSize);
|
||||||
|
if (!dstBuf) { sqlite3_blob_close(dstBlob); return SQLITE_NOMEM; }
|
||||||
|
rc = sqlite3_blob_read(dstBlob, dstBuf, blobSize, 0);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_free(dstBuf); sqlite3_blob_close(dstBlob); return rc; }
|
||||||
|
|
||||||
|
bitmap_set(dstBuf, dst_offset, srcBit);
|
||||||
|
rc = sqlite3_blob_write(dstBlob, dstBuf, blobSize, 0);
|
||||||
|
sqlite3_blob_close(dstBlob);
|
||||||
|
sqlite3_free(dstBuf);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Integer, float, text view: fixed-size per slot
|
||||||
|
int slot_size;
|
||||||
|
switch (kind) {
|
||||||
|
case VEC0_METADATA_COLUMN_KIND_INTEGER: slot_size = sizeof(i64); break;
|
||||||
|
case VEC0_METADATA_COLUMN_KIND_FLOAT: slot_size = sizeof(double); break;
|
||||||
|
case VEC0_METADATA_COLUMN_KIND_TEXT: slot_size = VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH; break;
|
||||||
|
default: return SQLITE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *buf = sqlite3_malloc(slot_size);
|
||||||
|
if (!buf) return SQLITE_NOMEM;
|
||||||
|
|
||||||
|
// Read from source
|
||||||
|
sqlite3_blob *blob = NULL;
|
||||||
|
rc = sqlite3_blob_open(p->db, p->schemaName,
|
||||||
|
p->shadowMetadataChunksNames[i], "data",
|
||||||
|
src->chunk_id, 1, &blob);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_free(buf); return rc; }
|
||||||
|
rc = sqlite3_blob_read(blob, buf, slot_size, src_offset * slot_size);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_blob_close(blob); sqlite3_free(buf); return rc; }
|
||||||
|
// Zero source slot
|
||||||
|
void *zeros = sqlite3_malloc(slot_size);
|
||||||
|
if (!zeros) { sqlite3_blob_close(blob); sqlite3_free(buf); return SQLITE_NOMEM; }
|
||||||
|
memset(zeros, 0, slot_size);
|
||||||
|
rc = sqlite3_blob_write(blob, zeros, slot_size, src_offset * slot_size);
|
||||||
|
sqlite3_free(zeros);
|
||||||
|
sqlite3_blob_close(blob);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_free(buf); return rc; }
|
||||||
|
|
||||||
|
// Write to destination
|
||||||
|
rc = sqlite3_blob_open(p->db, p->schemaName,
|
||||||
|
p->shadowMetadataChunksNames[i], "data",
|
||||||
|
dst->chunk_id, 1, &blob);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_free(buf); return rc; }
|
||||||
|
rc = sqlite3_blob_write(blob, buf, slot_size, dst_offset * slot_size);
|
||||||
|
sqlite3_blob_close(blob);
|
||||||
|
sqlite3_free(buf);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Update in-memory validity and rowids
|
||||||
|
bitmap_set(src->validity, src_offset, 0);
|
||||||
|
bitmap_set(dst->validity, dst_offset, 1);
|
||||||
|
src->rowids[src_offset] = 0;
|
||||||
|
dst->rowids[dst_offset] = rowid;
|
||||||
|
src->modified = 1;
|
||||||
|
dst->modified = 1;
|
||||||
|
|
||||||
|
// 4. Update _rowids table position
|
||||||
|
rc = vec0_rowids_update_position(p, rowid, dst->chunk_id, dst_offset);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete a chunk and all its associated shadow table data.
|
||||||
|
* Does NOT check if it's empty — caller must ensure that.
|
||||||
|
*/
|
||||||
|
static int vec0_optimize_delete_chunk(vec0_vtab *p, i64 chunk_id) {
|
||||||
|
int rc;
|
||||||
|
char *zSql;
|
||||||
|
sqlite3_stmt *stmt;
|
||||||
|
|
||||||
|
// Delete from _chunks
|
||||||
|
zSql = sqlite3_mprintf(
|
||||||
|
"DELETE FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE chunk_id = ?",
|
||||||
|
p->schemaName, p->tableName);
|
||||||
|
if (!zSql) return SQLITE_NOMEM;
|
||||||
|
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||||
|
sqlite3_free(zSql);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
sqlite3_bind_int64(stmt, 1, chunk_id);
|
||||||
|
rc = sqlite3_step(stmt);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
if (rc != SQLITE_DONE) return SQLITE_ERROR;
|
||||||
|
|
||||||
|
// Delete from each _vector_chunksNN
|
||||||
|
for (int i = 0; i < p->numVectorColumns; i++) {
|
||||||
|
zSql = sqlite3_mprintf(
|
||||||
|
"DELETE FROM " VEC0_SHADOW_VECTOR_N_NAME " WHERE rowid = ?",
|
||||||
|
p->schemaName, p->tableName, i);
|
||||||
|
if (!zSql) return SQLITE_NOMEM;
|
||||||
|
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||||
|
sqlite3_free(zSql);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
sqlite3_bind_int64(stmt, 1, chunk_id);
|
||||||
|
rc = sqlite3_step(stmt);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
if (rc != SQLITE_DONE) return SQLITE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete from each _metadatachunksNN
|
||||||
|
for (int i = 0; i < p->numMetadataColumns; i++) {
|
||||||
|
zSql = sqlite3_mprintf(
|
||||||
|
"DELETE FROM " VEC0_SHADOW_METADATA_N_NAME " WHERE rowid = ?",
|
||||||
|
p->schemaName, p->tableName, i);
|
||||||
|
if (!zSql) return SQLITE_NOMEM;
|
||||||
|
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||||
|
sqlite3_free(zSql);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
sqlite3_bind_int64(stmt, 1, chunk_id);
|
||||||
|
rc = sqlite3_step(stmt);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
if (rc != SQLITE_DONE) return SQLITE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SQLITE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush modified in-memory validity and rowids blobs back to the DB.
|
||||||
|
*/
|
||||||
|
static int vec0_optimize_flush_chunk(vec0_vtab *p, struct vec0_optimize_chunk *c) {
|
||||||
|
int rc;
|
||||||
|
sqlite3_blob *blob = NULL;
|
||||||
|
|
||||||
|
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
|
||||||
|
c->chunk_id, 1, &blob);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
rc = sqlite3_blob_write(blob, c->validity, c->validity_size, 0);
|
||||||
|
sqlite3_blob_close(blob);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
|
||||||
|
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
|
||||||
|
c->chunk_id, 1, &blob);
|
||||||
|
if (rc != SQLITE_OK) return rc;
|
||||||
|
rc = sqlite3_blob_write(blob, c->rowids, c->rowids_size, 0);
|
||||||
|
sqlite3_blob_close(blob);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optimize one partition: compact live entries from newer chunks into
|
||||||
|
* older chunks, then delete any emptied chunks.
|
||||||
|
*/
|
||||||
|
static int vec0_optimize_one_partition(vec0_vtab *p, sqlite3_stmt *stmtChunks) {
|
||||||
|
int rc = SQLITE_OK;
|
||||||
|
int nChunks = 0;
|
||||||
|
int nAlloced = 0;
|
||||||
|
struct vec0_optimize_chunk *chunks = NULL;
|
||||||
|
|
||||||
|
// Step 1: Load all chunks for this partition into memory
|
||||||
|
while ((rc = sqlite3_step(stmtChunks)) == SQLITE_ROW) {
|
||||||
|
if (nChunks >= nAlloced) {
|
||||||
|
nAlloced = nAlloced ? nAlloced * 2 : 8;
|
||||||
|
struct vec0_optimize_chunk *tmp = sqlite3_realloc(chunks, nAlloced * sizeof(*chunks));
|
||||||
|
if (!tmp) { rc = SQLITE_NOMEM; goto cleanup; }
|
||||||
|
chunks = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct vec0_optimize_chunk *c = &chunks[nChunks];
|
||||||
|
memset(c, 0, sizeof(*c));
|
||||||
|
c->chunk_id = sqlite3_column_int64(stmtChunks, 0);
|
||||||
|
c->modified = 0;
|
||||||
|
|
||||||
|
// Read validity blob
|
||||||
|
const void *vBlob = sqlite3_column_blob(stmtChunks, 1);
|
||||||
|
c->validity_size = sqlite3_column_bytes(stmtChunks, 1);
|
||||||
|
c->validity = sqlite3_malloc(c->validity_size);
|
||||||
|
if (!c->validity) { rc = SQLITE_NOMEM; goto cleanup; }
|
||||||
|
memcpy(c->validity, vBlob, c->validity_size);
|
||||||
|
|
||||||
|
// Read rowids blob
|
||||||
|
const void *rBlob = sqlite3_column_blob(stmtChunks, 2);
|
||||||
|
c->rowids_size = sqlite3_column_bytes(stmtChunks, 2);
|
||||||
|
c->rowids = sqlite3_malloc(c->rowids_size);
|
||||||
|
if (!c->rowids) { rc = SQLITE_NOMEM; goto cleanup; }
|
||||||
|
memcpy(c->rowids, rBlob, c->rowids_size);
|
||||||
|
|
||||||
|
nChunks++;
|
||||||
|
}
|
||||||
|
if (rc != SQLITE_DONE) goto cleanup;
|
||||||
|
rc = SQLITE_OK;
|
||||||
|
|
||||||
|
// Nothing to compact with 0 or 1 chunks
|
||||||
|
if (nChunks <= 1) goto cleanup;
|
||||||
|
|
||||||
|
// Step 2: Two-pointer compaction
|
||||||
|
{
|
||||||
|
int left = 0; // index of target chunk (oldest with free space)
|
||||||
|
int right = nChunks - 1; // index of source chunk (newest)
|
||||||
|
int left_free = -1; // next free slot in left chunk
|
||||||
|
int right_live = -1; // next live slot in right chunk (scan from end)
|
||||||
|
|
||||||
|
// Find first free slot in left chunk
|
||||||
|
for (int i = 0; i < p->chunk_size; i++) {
|
||||||
|
if (!bitmap_get(chunks[left].validity, i)) { left_free = i; break; }
|
||||||
|
}
|
||||||
|
// If left chunk is full, advance
|
||||||
|
while (left < right && left_free < 0) {
|
||||||
|
left++;
|
||||||
|
for (int i = 0; i < p->chunk_size && left < right; i++) {
|
||||||
|
if (!bitmap_get(chunks[left].validity, i)) { left_free = i; break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find last live slot in right chunk (scan backwards for efficiency)
|
||||||
|
for (int i = p->chunk_size - 1; i >= 0; i--) {
|
||||||
|
if (bitmap_get(chunks[right].validity, i)) { right_live = i; break; }
|
||||||
|
}
|
||||||
|
// If right chunk is empty, retreat
|
||||||
|
while (left < right && right_live < 0) {
|
||||||
|
right--;
|
||||||
|
for (int i = p->chunk_size - 1; i >= 0; i--) {
|
||||||
|
if (bitmap_get(chunks[right].validity, i)) { right_live = i; break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (left < right) {
|
||||||
|
// Move entry from right to left
|
||||||
|
rc = vec0_optimize_move_entry(p,
|
||||||
|
&chunks[right], right_live,
|
||||||
|
&chunks[left], left_free);
|
||||||
|
if (rc != SQLITE_OK) goto cleanup;
|
||||||
|
|
||||||
|
// Advance left_free to next free slot in current left chunk
|
||||||
|
{
|
||||||
|
int prev = left_free;
|
||||||
|
left_free = -1;
|
||||||
|
for (int i = prev + 1; i < p->chunk_size; i++) {
|
||||||
|
if (!bitmap_get(chunks[left].validity, i)) { left_free = i; break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If left chunk is now full, advance to next chunk
|
||||||
|
while (left < right && left_free < 0) {
|
||||||
|
left++;
|
||||||
|
if (left >= right) break;
|
||||||
|
for (int i = 0; i < p->chunk_size; i++) {
|
||||||
|
if (!bitmap_get(chunks[left].validity, i)) { left_free = i; break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retreat right_live to previous live slot in current right chunk
|
||||||
|
{
|
||||||
|
int prev = right_live;
|
||||||
|
right_live = -1;
|
||||||
|
for (int i = prev - 1; i >= 0; i--) {
|
||||||
|
if (bitmap_get(chunks[right].validity, i)) { right_live = i; break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If right chunk is now empty, retreat to previous chunk
|
||||||
|
while (left < right && right_live < 0) {
|
||||||
|
right--;
|
||||||
|
if (left >= right) break;
|
||||||
|
for (int i = p->chunk_size - 1; i >= 0; i--) {
|
||||||
|
if (bitmap_get(chunks[right].validity, i)) { right_live = i; break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Flush modified chunks, delete empty ones
|
||||||
|
for (int i = 0; i < nChunks; i++) {
|
||||||
|
// Check if chunk is now empty
|
||||||
|
int allZero = 1;
|
||||||
|
for (int j = 0; j < chunks[i].validity_size; j++) {
|
||||||
|
if (chunks[i].validity[j] != 0) { allZero = 0; break; }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allZero) {
|
||||||
|
rc = vec0_optimize_delete_chunk(p, chunks[i].chunk_id);
|
||||||
|
if (rc != SQLITE_OK) goto cleanup;
|
||||||
|
} else if (chunks[i].modified) {
|
||||||
|
rc = vec0_optimize_flush_chunk(p, &chunks[i]);
|
||||||
|
if (rc != SQLITE_OK) goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (chunks) {
|
||||||
|
for (int i = 0; i < nChunks; i++) {
|
||||||
|
sqlite3_free(chunks[i].validity);
|
||||||
|
sqlite3_free(chunks[i].rowids);
|
||||||
|
}
|
||||||
|
sqlite3_free(chunks);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Top-level optimize: wraps everything in a savepoint, iterates partitions.
|
||||||
|
*/
|
||||||
|
static int vec0_optimize(vec0_vtab *p) {
|
||||||
|
int rc;
|
||||||
|
char *zSql;
|
||||||
|
sqlite3_stmt *stmt = NULL;
|
||||||
|
|
||||||
|
// Free cached statements that may hold references to shadow tables
|
||||||
|
if (p->stmtLatestChunk) {
|
||||||
|
sqlite3_finalize(p->stmtLatestChunk);
|
||||||
|
p->stmtLatestChunk = NULL;
|
||||||
|
}
|
||||||
|
if (p->stmtRowidsUpdatePosition) {
|
||||||
|
sqlite3_finalize(p->stmtRowidsUpdatePosition);
|
||||||
|
p->stmtRowidsUpdatePosition = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p->numPartitionColumns == 0) {
|
||||||
|
// No partitions: single pass over all chunks
|
||||||
|
zSql = sqlite3_mprintf(
|
||||||
|
"SELECT chunk_id, validity, rowids FROM " VEC0_SHADOW_CHUNKS_NAME
|
||||||
|
" ORDER BY chunk_id ASC",
|
||||||
|
p->schemaName, p->tableName);
|
||||||
|
if (!zSql) { rc = SQLITE_NOMEM; goto done; }
|
||||||
|
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||||
|
sqlite3_free(zSql);
|
||||||
|
if (rc != SQLITE_OK) goto done;
|
||||||
|
|
||||||
|
rc = vec0_optimize_one_partition(p, stmt);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
stmt = NULL;
|
||||||
|
if (rc != SQLITE_OK) goto done;
|
||||||
|
} else {
|
||||||
|
// Partitioned: get distinct partition values, then optimize each
|
||||||
|
sqlite3_str *s = sqlite3_str_new(NULL);
|
||||||
|
sqlite3_str_appendf(s, "SELECT DISTINCT ");
|
||||||
|
for (int i = 0; i < p->numPartitionColumns; i++) {
|
||||||
|
if (i > 0) sqlite3_str_appendall(s, ", ");
|
||||||
|
sqlite3_str_appendf(s, "partition%02d", i);
|
||||||
|
}
|
||||||
|
sqlite3_str_appendf(s, " FROM " VEC0_SHADOW_CHUNKS_NAME,
|
||||||
|
p->schemaName, p->tableName);
|
||||||
|
zSql = sqlite3_str_finish(s);
|
||||||
|
if (!zSql) { rc = SQLITE_NOMEM; goto done; }
|
||||||
|
|
||||||
|
sqlite3_stmt *stmtPartitions = NULL;
|
||||||
|
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmtPartitions, NULL);
|
||||||
|
sqlite3_free(zSql);
|
||||||
|
if (rc != SQLITE_OK) goto done;
|
||||||
|
|
||||||
|
while ((rc = sqlite3_step(stmtPartitions)) == SQLITE_ROW) {
|
||||||
|
// Build query for this partition's chunks
|
||||||
|
sqlite3_str *cs = sqlite3_str_new(NULL);
|
||||||
|
sqlite3_str_appendf(cs,
|
||||||
|
"SELECT chunk_id, validity, rowids FROM " VEC0_SHADOW_CHUNKS_NAME
|
||||||
|
" WHERE ",
|
||||||
|
p->schemaName, p->tableName);
|
||||||
|
for (int i = 0; i < p->numPartitionColumns; i++) {
|
||||||
|
if (i > 0) sqlite3_str_appendall(cs, " AND ");
|
||||||
|
sqlite3_str_appendf(cs, "partition%02d = ?", i);
|
||||||
|
}
|
||||||
|
sqlite3_str_appendall(cs, " ORDER BY chunk_id ASC");
|
||||||
|
char *zChunkSql = sqlite3_str_finish(cs);
|
||||||
|
if (!zChunkSql) { sqlite3_finalize(stmtPartitions); rc = SQLITE_NOMEM; goto done; }
|
||||||
|
|
||||||
|
sqlite3_stmt *stmtChunks = NULL;
|
||||||
|
rc = sqlite3_prepare_v2(p->db, zChunkSql, -1, &stmtChunks, NULL);
|
||||||
|
sqlite3_free(zChunkSql);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_finalize(stmtPartitions); goto done; }
|
||||||
|
|
||||||
|
for (int i = 0; i < p->numPartitionColumns; i++) {
|
||||||
|
sqlite3_bind_value(stmtChunks, i + 1, sqlite3_column_value(stmtPartitions, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = vec0_optimize_one_partition(p, stmtChunks);
|
||||||
|
sqlite3_finalize(stmtChunks);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_finalize(stmtPartitions); goto done; }
|
||||||
|
}
|
||||||
|
sqlite3_finalize(stmtPartitions);
|
||||||
|
if (rc != SQLITE_DONE) goto done;
|
||||||
|
rc = SQLITE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
// Invalidate stmtLatestChunk since chunks may have been deleted
|
||||||
|
if (p->stmtLatestChunk) {
|
||||||
|
sqlite3_finalize(p->stmtLatestChunk);
|
||||||
|
p->stmtLatestChunk = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, sqlite3_value * value, i64 rowid) {
|
int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, sqlite3_value * value, i64 rowid) {
|
||||||
int rc;
|
int rc;
|
||||||
sqlite3_stmt *stmt;
|
sqlite3_stmt *stmt;
|
||||||
|
|
|
||||||
|
|
@ -72,10 +72,13 @@ $(TARGET_DIR)/vec_mismatch: vec-mismatch.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
$(TARGET_DIR)/vec0_delete_completeness: vec0-delete-completeness.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
$(TARGET_DIR)/vec0_delete_completeness: vec0-delete-completeness.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
|
|
||||||
|
$(TARGET_DIR)/vec0_optimize: vec0-optimize.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
|
|
||||||
FUZZ_TARGETS = vec0_create exec json numpy \
|
FUZZ_TARGETS = vec0_create exec json numpy \
|
||||||
shadow_corrupt vec0_operations scalar_functions \
|
shadow_corrupt vec0_operations scalar_functions \
|
||||||
vec0_create_full metadata_columns vec_each vec_mismatch \
|
vec0_create_full metadata_columns vec_each vec_mismatch \
|
||||||
vec0_delete_completeness
|
vec0_delete_completeness vec0_optimize
|
||||||
|
|
||||||
all: $(addprefix $(TARGET_DIR)/,$(FUZZ_TARGETS))
|
all: $(addprefix $(TARGET_DIR)/,$(FUZZ_TARGETS))
|
||||||
|
|
||||||
|
|
|
||||||
140
tests/fuzz/vec0-optimize.c
Normal file
140
tests/fuzz/vec0-optimize.c
Normal file
|
|
@ -0,0 +1,140 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "sqlite-vec.h"
|
||||||
|
#include "sqlite3.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fuzz target for the vec0 optimize command.
|
||||||
|
* Performs random INSERT/DELETE operations, then runs optimize,
|
||||||
|
* and asserts that all remaining rows are still queryable and
|
||||||
|
* the virtual table is in a consistent state.
|
||||||
|
*/
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
|
if (size < 4) return 0;
|
||||||
|
|
||||||
|
int rc;
|
||||||
|
sqlite3 *db;
|
||||||
|
sqlite3_stmt *stmtInsert = NULL;
|
||||||
|
sqlite3_stmt *stmtDelete = NULL;
|
||||||
|
sqlite3_stmt *stmtScan = NULL;
|
||||||
|
|
||||||
|
rc = sqlite3_open(":memory:", &db);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
rc = sqlite3_exec(db,
|
||||||
|
"CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=4)",
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
|
||||||
|
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL);
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"SELECT rowid, emb FROM v", -1, &stmtScan, NULL);
|
||||||
|
|
||||||
|
if (!stmtInsert || !stmtDelete || !stmtScan) goto cleanup;
|
||||||
|
|
||||||
|
/* Track which rowids are live */
|
||||||
|
uint8_t live[16];
|
||||||
|
memset(live, 0, sizeof(live));
|
||||||
|
|
||||||
|
size_t i = 0;
|
||||||
|
while (i + 2 <= size - 2) { /* reserve 2 bytes for optimize trigger */
|
||||||
|
uint8_t op = data[i++] % 3;
|
||||||
|
uint8_t rowid_byte = data[i++];
|
||||||
|
int64_t rowid = (int64_t)(rowid_byte % 16) + 1;
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case 0: {
|
||||||
|
/* INSERT */
|
||||||
|
float vec[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
for (int j = 0; j < 4 && i < size - 2; j++, i++) {
|
||||||
|
vec[j] = (float)((int8_t)data[i]) / 10.0f;
|
||||||
|
}
|
||||||
|
sqlite3_reset(stmtInsert);
|
||||||
|
sqlite3_bind_int64(stmtInsert, 1, rowid);
|
||||||
|
sqlite3_bind_blob(stmtInsert, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
|
||||||
|
rc = sqlite3_step(stmtInsert);
|
||||||
|
if (rc == SQLITE_DONE) {
|
||||||
|
live[rowid - 1] = 1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 1: {
|
||||||
|
/* DELETE */
|
||||||
|
sqlite3_reset(stmtDelete);
|
||||||
|
sqlite3_bind_int64(stmtDelete, 1, rowid);
|
||||||
|
rc = sqlite3_step(stmtDelete);
|
||||||
|
if (rc == SQLITE_DONE) {
|
||||||
|
live[rowid - 1] = 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 2: {
|
||||||
|
/* Full scan */
|
||||||
|
sqlite3_reset(stmtScan);
|
||||||
|
while (sqlite3_step(stmtScan) == SQLITE_ROW) {}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Run optimize */
|
||||||
|
rc = sqlite3_exec(db, "INSERT INTO v(v) VALUES ('optimize')", NULL, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
/* Verify: all live rows are still queryable */
|
||||||
|
int expected_count = 0;
|
||||||
|
for (int j = 0; j < 16; j++) {
|
||||||
|
if (live[j]) expected_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_stmt *stmtCount = NULL;
|
||||||
|
sqlite3_prepare_v2(db, "SELECT count(*) FROM v", -1, &stmtCount, NULL);
|
||||||
|
if (stmtCount) {
|
||||||
|
rc = sqlite3_step(stmtCount);
|
||||||
|
assert(rc == SQLITE_ROW);
|
||||||
|
int actual_count = sqlite3_column_int(stmtCount, 0);
|
||||||
|
assert(actual_count == expected_count);
|
||||||
|
sqlite3_finalize(stmtCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify each live row is accessible via point query */
|
||||||
|
sqlite3_stmt *stmtPoint = NULL;
|
||||||
|
sqlite3_prepare_v2(db, "SELECT emb FROM v WHERE rowid = ?", -1, &stmtPoint, NULL);
|
||||||
|
if (stmtPoint) {
|
||||||
|
for (int j = 0; j < 16; j++) {
|
||||||
|
if (!live[j]) continue;
|
||||||
|
sqlite3_reset(stmtPoint);
|
||||||
|
sqlite3_bind_int64(stmtPoint, 1, j + 1);
|
||||||
|
rc = sqlite3_step(stmtPoint);
|
||||||
|
assert(rc == SQLITE_ROW);
|
||||||
|
assert(sqlite3_column_bytes(stmtPoint, 0) == 16);
|
||||||
|
}
|
||||||
|
sqlite3_finalize(stmtPoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify shadow table consistency: _rowids count matches live count */
|
||||||
|
sqlite3_stmt *stmtRowids = NULL;
|
||||||
|
sqlite3_prepare_v2(db, "SELECT count(*) FROM v_rowids", -1, &stmtRowids, NULL);
|
||||||
|
if (stmtRowids) {
|
||||||
|
rc = sqlite3_step(stmtRowids);
|
||||||
|
assert(rc == SQLITE_ROW);
|
||||||
|
assert(sqlite3_column_int(stmtRowids, 0) == expected_count);
|
||||||
|
sqlite3_finalize(stmtRowids);
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
sqlite3_finalize(stmtInsert);
|
||||||
|
sqlite3_finalize(stmtDelete);
|
||||||
|
sqlite3_finalize(stmtScan);
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
450
tests/test-optimize.py
Normal file
450
tests/test-optimize.py
Normal file
|
|
@ -0,0 +1,450 @@
|
||||||
|
import sqlite3
|
||||||
|
import struct
|
||||||
|
import pytest
|
||||||
|
from helpers import _f32, _i64, _int8, exec
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_basic(db):
|
||||||
|
"""Insert 16 rows (2 chunks of 8), delete 6 from chunk 1, optimize → 1 chunk."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 2
|
||||||
|
|
||||||
|
# Delete 6 from chunk 1 (rows 1-6), leaving 2 live in chunk 1
|
||||||
|
for i in range(1, 7):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
# 10 live rows: 2 in chunk 1, 8 in chunk 2
|
||||||
|
assert db.execute("select count(*) from v").fetchone()[0] == 10
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
# After optimize: 10 entries should fit in 2 chunks (8+2)
|
||||||
|
# but the 8 from chunk 2 can't all be moved into 6 free slots of chunk 1,
|
||||||
|
# so we should still have at least 2 chunks.
|
||||||
|
# Actually: left=chunk1(6 free), right=chunk2(8 live)
|
||||||
|
# Move 6 entries from chunk2 → chunk1, chunk2 still has 2 live → 2 chunks remain
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 2
|
||||||
|
|
||||||
|
# All 10 rows still queryable
|
||||||
|
rows = db.execute("select rowid from v order by rowid").fetchall()
|
||||||
|
assert [r[0] for r in rows] == list(range(7, 17))
|
||||||
|
|
||||||
|
for i in range(7, 17):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_full_compaction(db):
|
||||||
|
"""Insert 24 rows (3 chunks of 8), delete all but 4, optimize → 1 chunk."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
for i in range(1, 25):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 3
|
||||||
|
|
||||||
|
# Keep rows 1,2,3,4 in chunk 1, delete everything else
|
||||||
|
for i in range(5, 25):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
assert db.execute("select count(*) from v").fetchone()[0] == 4
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
# Only 1 chunk should remain
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 1
|
||||||
|
assert db.execute("select count(*) from v_vector_chunks00").fetchone()[0] == 1
|
||||||
|
|
||||||
|
# All 4 rows still queryable
|
||||||
|
for i in range(1, 5):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_noop_clean_table(db):
|
||||||
|
"""Insert exactly 8 rows (1 full chunk), optimize is a no-op."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
for i in range(1, 9):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 1
|
||||||
|
for i in range(1, 9):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_empty_table(db):
|
||||||
|
"""Optimize on empty table is a no-op."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_knn_still_works(db):
|
||||||
|
"""After optimize, KNN queries return correct results."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Delete first 6 rows
|
||||||
|
for i in range(1, 7):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
# KNN query for vector closest to [7,7,7,7]
|
||||||
|
knn = db.execute(
|
||||||
|
"select rowid, distance from v where emb match ? and k = 1",
|
||||||
|
[_f32([7.0, 7.0, 7.0, 7.0])],
|
||||||
|
).fetchall()
|
||||||
|
assert len(knn) == 1
|
||||||
|
assert knn[0][0] == 7
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_fullscan_still_works(db):
|
||||||
|
"""After optimize, SELECT * returns all rows."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 7):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
rows = db.execute("select rowid, emb from v order by rowid").fetchall()
|
||||||
|
assert len(rows) == 10
|
||||||
|
for row in rows:
|
||||||
|
assert row[1] == _f32([float(row[0])] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_partitioned(db):
|
||||||
|
"""Two partitions each fragmented → optimized independently."""
|
||||||
|
db.execute(
|
||||||
|
"create virtual table v using vec0("
|
||||||
|
"part text partition key, emb float[4], chunk_size=8"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Partition A: 16 rows (2 chunks)
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, part, emb) values (?, 'A', ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Partition B: 16 rows (2 chunks)
|
||||||
|
for i in range(17, 33):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, part, emb) values (?, 'B', ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 4
|
||||||
|
|
||||||
|
# Delete 7 from each partition's first chunk
|
||||||
|
for i in range(1, 8):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
for i in range(17, 24):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
# Each partition had 9 live entries: fits in 2 chunks each → 4 total
|
||||||
|
# (7 free in chunk1 + 8 live in chunk2 → move 7 → chunk2 has 1 live → still 2 chunks)
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 4
|
||||||
|
|
||||||
|
# All remaining rows still accessible
|
||||||
|
for i in range(8, 17):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
for i in range(24, 33):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_with_metadata(db):
|
||||||
|
"""Optimize with integer, float, boolean, and short text metadata."""
|
||||||
|
db.execute(
|
||||||
|
"create virtual table v using vec0("
|
||||||
|
"emb float[4], "
|
||||||
|
"m_bool boolean, "
|
||||||
|
"m_int integer, "
|
||||||
|
"m_float float, "
|
||||||
|
"m_text text, "
|
||||||
|
"chunk_size=8"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb, m_bool, m_int, m_float, m_text) "
|
||||||
|
"values (?, ?, ?, ?, ?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4), i % 2 == 0, i * 10, float(i) / 2.0, f"t{i}"],
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 7):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
# Verify metadata preserved
|
||||||
|
for i in range(7, 17):
|
||||||
|
row = db.execute(
|
||||||
|
"select m_bool, m_int, m_float, m_text from v where rowid = ?", [i]
|
||||||
|
).fetchone()
|
||||||
|
assert row[0] == (1 if i % 2 == 0 else 0), f"bool mismatch at rowid {i}"
|
||||||
|
assert row[1] == i * 10, f"int mismatch at rowid {i}"
|
||||||
|
assert abs(row[2] - float(i) / 2.0) < 1e-6, f"float mismatch at rowid {i}"
|
||||||
|
assert row[3] == f"t{i}", f"text mismatch at rowid {i}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_with_auxiliary(db):
|
||||||
|
"""Aux data still accessible after optimize (keyed by rowid, no move needed)."""
|
||||||
|
db.execute(
|
||||||
|
"create virtual table v using vec0("
|
||||||
|
"emb float[4], +aux_text text, chunk_size=8"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb, aux_text) values (?, ?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4), f"aux_{i}"],
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 7):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
for i in range(7, 17):
|
||||||
|
row = db.execute(
|
||||||
|
"select aux_text from v where rowid = ?", [i]
|
||||||
|
).fetchone()
|
||||||
|
assert row[0] == f"aux_{i}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_text_pk(db):
|
||||||
|
"""Rowids correctly updated, text PKs still work after optimize."""
|
||||||
|
db.execute(
|
||||||
|
"create virtual table v using vec0("
|
||||||
|
"id text primary key, emb float[4], chunk_size=8"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(id, emb) values (?, ?)",
|
||||||
|
[f"doc_{i}", _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 7):
|
||||||
|
db.execute("delete from v where id = ?", [f"doc_{i}"])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
for i in range(7, 17):
|
||||||
|
row = db.execute(
|
||||||
|
"select emb from v where id = ?", [f"doc_{i}"]
|
||||||
|
).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def _file_db(tmp_path):
|
||||||
|
"""Open a file-backed DB (required for page_count to shrink after VACUUM)."""
|
||||||
|
db = sqlite3.connect(str(tmp_path / "test.db"))
|
||||||
|
db.row_factory = sqlite3.Row
|
||||||
|
db.enable_load_extension(True)
|
||||||
|
db.load_extension("dist/vec0")
|
||||||
|
db.enable_load_extension(False)
|
||||||
|
return db
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_disk_space_reclaimed(tmp_path):
|
||||||
|
"""PRAGMA page_count decreases after optimize + VACUUM."""
|
||||||
|
dims = 256
|
||||||
|
db = _file_db(tmp_path)
|
||||||
|
db.execute(f"create virtual table v using vec0(emb float[{dims}], chunk_size=8)")
|
||||||
|
|
||||||
|
for i in range(1, 25): # 3 full chunks of 8
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * dims)],
|
||||||
|
)
|
||||||
|
db.commit()
|
||||||
|
pages_before = db.execute("pragma page_count").fetchone()[0]
|
||||||
|
|
||||||
|
# Delete 20 of 24 rows (leaving 4 live)
|
||||||
|
for i in range(5, 25):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
db.execute("vacuum")
|
||||||
|
pages_after = db.execute("pragma page_count").fetchone()[0]
|
||||||
|
assert pages_after < pages_before, (
|
||||||
|
f"page_count should shrink after optimize+vacuum: "
|
||||||
|
f"{pages_before} -> {pages_after}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remaining rows still work
|
||||||
|
for i in range(1, 5):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * dims)
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_unknown_command(db):
|
||||||
|
"""Unknown command gives SQLITE_ERROR with message."""
|
||||||
|
result = exec(db, "insert into v(v) values ('bogus')")
|
||||||
|
# We need a table first
|
||||||
|
db.execute("create virtual table v2 using vec0(emb float[4], chunk_size=8)")
|
||||||
|
result = exec(db, "insert into v2(v2) values ('bogus')")
|
||||||
|
assert "error" in result
|
||||||
|
assert "Unknown" in result["message"] or "unknown" in result["message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_insert_after(db):
|
||||||
|
"""Inserting new rows after optimize still works correctly."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 7):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
# Insert new rows after optimize
|
||||||
|
for i in range(100, 108):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Both old and new rows queryable
|
||||||
|
for i in range(7, 17):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
for i in range(100, 108):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_multiple_moves_from_same_chunk(db):
|
||||||
|
"""Ensure multiple live entries in the same source chunk are all moved."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
# 24 rows = 3 chunks of 8
|
||||||
|
for i in range(1, 25):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Delete all of chunk 1 (1-8) — leaves 8 free slots
|
||||||
|
for i in range(1, 9):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
# Delete half of chunk 2 (9-12) — leaves 4 live in chunk 2, 8 live in chunk 3
|
||||||
|
for i in range(9, 13):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
# 12 live rows total: 4 in chunk 2 (offsets 4-7), 8 in chunk 3 (offsets 0-7)
|
||||||
|
assert db.execute("select count(*) from v").fetchone()[0] == 12
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
# After optimize: all 12 should fit in 2 chunks, chunk 3 should be emptied
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 2
|
||||||
|
|
||||||
|
# All remaining rows still queryable with correct vectors
|
||||||
|
for i in range(13, 25):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_scattered_deletes(db):
|
||||||
|
"""Delete every other row to create scattered free slots across chunks."""
|
||||||
|
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||||
|
|
||||||
|
for i in range(1, 25):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb) values (?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4)],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Delete even rows: 2,4,6,8,10,12,14,16,18,20,22,24
|
||||||
|
for i in range(2, 25, 2):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
# 12 live rows scattered across 3 chunks
|
||||||
|
assert db.execute("select count(*) from v").fetchone()[0] == 12
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
# After optimize: 12 rows should fit in 2 chunks
|
||||||
|
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 2
|
||||||
|
|
||||||
|
# All remaining odd rows still queryable
|
||||||
|
for i in range(1, 25, 2):
|
||||||
|
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||||
|
assert row[0] == _f32([float(i)] * 4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimize_with_long_text_metadata(db):
|
||||||
|
"""Long text metadata (overflow) preserved after optimize."""
|
||||||
|
db.execute(
|
||||||
|
"create virtual table v using vec0("
|
||||||
|
"emb float[4], m_text text, chunk_size=8"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
|
||||||
|
long_text = "x" * 100 # >12 chars, stored in overflow table
|
||||||
|
|
||||||
|
for i in range(1, 17):
|
||||||
|
db.execute(
|
||||||
|
"insert into v(rowid, emb, m_text) values (?, ?, ?)",
|
||||||
|
[i, _f32([float(i)] * 4), f"{long_text}_{i}"],
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(1, 7):
|
||||||
|
db.execute("delete from v where rowid = ?", [i])
|
||||||
|
|
||||||
|
db.execute("insert into v(v) values ('optimize')")
|
||||||
|
|
||||||
|
for i in range(7, 17):
|
||||||
|
row = db.execute(
|
||||||
|
"select m_text from v where rowid = ?", [i]
|
||||||
|
).fetchone()
|
||||||
|
assert row[0] == f"{long_text}_{i}"
|
||||||
|
|
@ -659,6 +659,252 @@ void test_distance_hamming() {
|
||||||
printf(" All distance_hamming tests passed.\n");
|
printf(" All distance_hamming tests passed.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper: create an in-memory DB with vec0 loaded
|
||||||
|
static sqlite3 *test_db_open(void) {
|
||||||
|
sqlite3 *db;
|
||||||
|
int rc = sqlite3_open(":memory:", &db);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
return db;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: execute SQL, assert success
|
||||||
|
static void test_exec(sqlite3 *db, const char *sql) {
|
||||||
|
char *errmsg = NULL;
|
||||||
|
int rc = sqlite3_exec(db, sql, NULL, NULL, &errmsg);
|
||||||
|
if (rc != SQLITE_OK) {
|
||||||
|
fprintf(stderr, "SQL error: %s\n SQL: %s\n", errmsg ? errmsg : "(null)", sql);
|
||||||
|
sqlite3_free(errmsg);
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: execute SQL, return integer from first column of first row
|
||||||
|
static int test_exec_int(sqlite3 *db, const char *sql) {
|
||||||
|
sqlite3_stmt *stmt;
|
||||||
|
int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_step(stmt);
|
||||||
|
assert(rc == SQLITE_ROW);
|
||||||
|
int val = sqlite3_column_int(stmt, 0);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: insert a float[4] vector with given rowid
|
||||||
|
static void test_insert_f4(sqlite3 *db, int64_t rowid, float v0, float v1, float v2, float v3) {
|
||||||
|
sqlite3_stmt *stmt;
|
||||||
|
int rc = sqlite3_prepare_v2(db,
|
||||||
|
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmt, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
float vec[4] = {v0, v1, v2, v3};
|
||||||
|
sqlite3_bind_int64(stmt, 1, rowid);
|
||||||
|
sqlite3_bind_blob(stmt, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
|
||||||
|
rc = sqlite3_step(stmt);
|
||||||
|
assert(rc == SQLITE_DONE);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: verify a float[4] vector at given rowid
|
||||||
|
static void test_verify_f4(sqlite3 *db, int64_t rowid, float v0, float v1, float v2, float v3) {
|
||||||
|
sqlite3_stmt *stmt;
|
||||||
|
int rc = sqlite3_prepare_v2(db,
|
||||||
|
"SELECT emb FROM v WHERE rowid = ?", -1, &stmt, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
sqlite3_bind_int64(stmt, 1, rowid);
|
||||||
|
rc = sqlite3_step(stmt);
|
||||||
|
assert(rc == SQLITE_ROW);
|
||||||
|
const float *blob = sqlite3_column_blob(stmt, 0);
|
||||||
|
assert(blob != NULL);
|
||||||
|
assert(sqlite3_column_bytes(stmt, 0) == 16);
|
||||||
|
float eps = 1e-6f;
|
||||||
|
assert(fabsf(blob[0] - v0) < eps);
|
||||||
|
assert(fabsf(blob[1] - v1) < eps);
|
||||||
|
assert(fabsf(blob[2] - v2) < eps);
|
||||||
|
assert(fabsf(blob[3] - v3) < eps);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_optimize_basic(void) {
|
||||||
|
printf("Starting %s...\n", __func__);
|
||||||
|
sqlite3 *db = test_db_open();
|
||||||
|
test_exec(db, "CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=8)");
|
||||||
|
|
||||||
|
// Insert 16 rows (2 chunks)
|
||||||
|
for (int i = 1; i <= 16; i++) {
|
||||||
|
test_insert_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
assert(test_exec_int(db, "SELECT count(*) FROM v_chunks") == 2);
|
||||||
|
|
||||||
|
// Delete first 6 rows
|
||||||
|
for (int i = 1; i <= 6; i++) {
|
||||||
|
char sql[64];
|
||||||
|
snprintf(sql, sizeof(sql), "DELETE FROM v WHERE rowid = %d", i);
|
||||||
|
test_exec(db, sql);
|
||||||
|
}
|
||||||
|
assert(test_exec_int(db, "SELECT count(*) FROM v") == 10);
|
||||||
|
|
||||||
|
// Optimize
|
||||||
|
test_exec(db, "INSERT INTO v(v) VALUES ('optimize')");
|
||||||
|
|
||||||
|
// All remaining rows still queryable
|
||||||
|
for (int i = 7; i <= 16; i++) {
|
||||||
|
test_verify_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
printf(" Passed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_optimize_full_compaction(void) {
|
||||||
|
printf("Starting %s...\n", __func__);
|
||||||
|
sqlite3 *db = test_db_open();
|
||||||
|
test_exec(db, "CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=8)");
|
||||||
|
|
||||||
|
for (int i = 1; i <= 24; i++) {
|
||||||
|
test_insert_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
assert(test_exec_int(db, "SELECT count(*) FROM v_chunks") == 3);
|
||||||
|
|
||||||
|
// Keep 1-4, delete 5-24
|
||||||
|
for (int i = 5; i <= 24; i++) {
|
||||||
|
char sql[64];
|
||||||
|
snprintf(sql, sizeof(sql), "DELETE FROM v WHERE rowid = %d", i);
|
||||||
|
test_exec(db, sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_exec(db, "INSERT INTO v(v) VALUES ('optimize')");
|
||||||
|
|
||||||
|
// Should compact to 1 chunk
|
||||||
|
assert(test_exec_int(db, "SELECT count(*) FROM v_chunks") == 1);
|
||||||
|
assert(test_exec_int(db, "SELECT count(*) FROM v_vector_chunks00") == 1);
|
||||||
|
|
||||||
|
for (int i = 1; i <= 4; i++) {
|
||||||
|
test_verify_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
printf(" Passed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_optimize_empty_table(void) {
|
||||||
|
printf("Starting %s...\n", __func__);
|
||||||
|
sqlite3 *db = test_db_open();
|
||||||
|
test_exec(db, "CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=8)");
|
||||||
|
|
||||||
|
// Optimize on empty table — should be no-op
|
||||||
|
test_exec(db, "INSERT INTO v(v) VALUES ('optimize')");
|
||||||
|
assert(test_exec_int(db, "SELECT count(*) FROM v_chunks") == 0);
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
printf(" Passed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_optimize_noop_full_chunk(void) {
|
||||||
|
printf("Starting %s...\n", __func__);
|
||||||
|
sqlite3 *db = test_db_open();
|
||||||
|
test_exec(db, "CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=8)");
|
||||||
|
|
||||||
|
for (int i = 1; i <= 8; i++) {
|
||||||
|
test_insert_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Single full chunk — optimize is no-op
|
||||||
|
test_exec(db, "INSERT INTO v(v) VALUES ('optimize')");
|
||||||
|
assert(test_exec_int(db, "SELECT count(*) FROM v_chunks") == 1);
|
||||||
|
|
||||||
|
for (int i = 1; i <= 8; i++) {
|
||||||
|
test_verify_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
printf(" Passed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_optimize_knn_after(void) {
|
||||||
|
printf("Starting %s...\n", __func__);
|
||||||
|
sqlite3 *db = test_db_open();
|
||||||
|
test_exec(db, "CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=8)");
|
||||||
|
|
||||||
|
for (int i = 1; i <= 16; i++) {
|
||||||
|
test_insert_f4(db, i, (float)i, 0, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 1; i <= 6; i++) {
|
||||||
|
char sql[64];
|
||||||
|
snprintf(sql, sizeof(sql), "DELETE FROM v WHERE rowid = %d", i);
|
||||||
|
test_exec(db, sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_exec(db, "INSERT INTO v(v) VALUES ('optimize')");
|
||||||
|
|
||||||
|
// KNN: find vector closest to [7,0,0,0]
|
||||||
|
sqlite3_stmt *stmt;
|
||||||
|
float query[4] = {7.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
int rc = sqlite3_prepare_v2(db,
|
||||||
|
"SELECT rowid FROM v WHERE emb MATCH ? AND k = 1", -1, &stmt, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
sqlite3_bind_blob(stmt, 1, query, sizeof(query), SQLITE_TRANSIENT);
|
||||||
|
rc = sqlite3_step(stmt);
|
||||||
|
assert(rc == SQLITE_ROW);
|
||||||
|
assert(sqlite3_column_int64(stmt, 0) == 7);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
printf(" Passed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_optimize_insert_after(void) {
|
||||||
|
printf("Starting %s...\n", __func__);
|
||||||
|
sqlite3 *db = test_db_open();
|
||||||
|
test_exec(db, "CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=8)");
|
||||||
|
|
||||||
|
for (int i = 1; i <= 16; i++) {
|
||||||
|
test_insert_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 1; i <= 6; i++) {
|
||||||
|
char sql[64];
|
||||||
|
snprintf(sql, sizeof(sql), "DELETE FROM v WHERE rowid = %d", i);
|
||||||
|
test_exec(db, sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_exec(db, "INSERT INTO v(v) VALUES ('optimize')");
|
||||||
|
|
||||||
|
// Insert new rows after optimize
|
||||||
|
for (int i = 100; i < 108; i++) {
|
||||||
|
test_insert_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Both old and new rows queryable
|
||||||
|
for (int i = 7; i <= 16; i++) {
|
||||||
|
test_verify_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
for (int i = 100; i < 108; i++) {
|
||||||
|
test_verify_f4(db, i, (float)i, (float)i, (float)i, (float)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
printf(" Passed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_optimize_unknown_command(void) {
|
||||||
|
printf("Starting %s...\n", __func__);
|
||||||
|
sqlite3 *db = test_db_open();
|
||||||
|
test_exec(db, "CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=8)");
|
||||||
|
|
||||||
|
char *errmsg = NULL;
|
||||||
|
int rc = sqlite3_exec(db, "INSERT INTO v(v) VALUES ('bogus')", NULL, NULL, &errmsg);
|
||||||
|
assert(rc != SQLITE_OK);
|
||||||
|
assert(errmsg != NULL);
|
||||||
|
assert(strstr(errmsg, "nknown") != NULL || strstr(errmsg, "unknown") != NULL);
|
||||||
|
sqlite3_free(errmsg);
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
printf(" Passed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
printf("Starting unit tests...\n");
|
printf("Starting unit tests...\n");
|
||||||
#ifdef SQLITE_VEC_ENABLE_AVX
|
#ifdef SQLITE_VEC_ENABLE_AVX
|
||||||
|
|
@ -677,5 +923,12 @@ int main() {
|
||||||
test_distance_l2_sqr_float();
|
test_distance_l2_sqr_float();
|
||||||
test_distance_cosine_float();
|
test_distance_cosine_float();
|
||||||
test_distance_hamming();
|
test_distance_hamming();
|
||||||
|
test_optimize_basic();
|
||||||
|
test_optimize_full_compaction();
|
||||||
|
test_optimize_empty_table();
|
||||||
|
test_optimize_noop_full_chunk();
|
||||||
|
test_optimize_knn_after();
|
||||||
|
test_optimize_insert_after();
|
||||||
|
test_optimize_unknown_command();
|
||||||
printf("All unit tests passed.\n");
|
printf("All unit tests passed.\n");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue