mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-24 16:26:37 +02:00
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug
When a row is deleted from a vec0 virtual table, the rowid slot in _chunks.rowids and vector data in _vector_chunksNN.vectors are now zeroed out (previously left as stale data, tracked in #54). When all rows in a chunk are deleted (validity bitmap all zeros), the chunk and its associated vector/metadata shadow table rows are reclaimed. - Add vec0Update_Delete_ClearRowid to zero the rowid blob slot - Add vec0Update_Delete_ClearVectors to zero all vector blob slots - Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN - Fix missing rc check in ClearMetadata loop (bug: errors were silently ignored) - Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs (SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type is not a true rowid alias, causing blob_open failures after chunk delete+recreate cycles) - Add 13 new tests covering rowid/vector zeroing, chunk reclamation, metadata/auxiliary/partition/text-PK/int8/bit variants, and page_count shrinkage verification - Add vec0-delete-completeness fuzz target - Update snapshots for new delete zeroing behavior Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b669801d31
commit
56707c4c09
6 changed files with 732 additions and 24 deletions
262
sqlite-vec.c
262
sqlite-vec.c
|
|
@ -3447,6 +3447,13 @@ static sqlite3_module vec_npy_eachModule = {
|
|||
#define VEC0_SHADOW_VECTOR_N_NAME "\"%w\".\"%w_vector_chunks%02d\""
|
||||
|
||||
/// 1) schema, 2) original vtab table name
|
||||
//
|
||||
// IMPORTANT: "rowid" is declared as PRIMARY KEY but WITHOUT the INTEGER type.
|
||||
// This means it is NOT a true SQLite rowid alias — the user-defined "rowid"
|
||||
// column and the internal SQLite rowid (_rowid_) are two separate values.
|
||||
// When inserting, both must be set explicitly to keep them in sync. See the
|
||||
// _rowid_ bindings in vec0_new_chunk() and the explanation in
|
||||
// SHADOW_TABLE_ROWID_QUIRK below.
|
||||
#define VEC0_SHADOW_VECTOR_N_CREATE \
|
||||
"CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME "(" \
|
||||
"rowid PRIMARY KEY," \
|
||||
|
|
@ -4506,6 +4513,20 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk
|
|||
|
||||
// Step 2: Create new vector chunks for each vector column, with
|
||||
// that new chunk_rowid.
|
||||
//
|
||||
// SHADOW_TABLE_ROWID_QUIRK: The _vector_chunksNN and _metadatachunksNN
|
||||
// shadow tables declare "rowid PRIMARY KEY" without the INTEGER type, so
|
||||
// the user-defined "rowid" column is NOT an alias for the internal SQLite
|
||||
// rowid (_rowid_). When only appending rows these two happen to stay in
|
||||
// sync, but after a chunk is deleted (vec0Update_Delete_DeleteChunkIfEmpty)
|
||||
// and a new one is created, the auto-assigned _rowid_ can diverge from the
|
||||
// user "rowid" value. Since sqlite3_blob_open() addresses rows by internal
|
||||
// _rowid_, we must explicitly set BOTH _rowid_ and "rowid" to the same
|
||||
// value so that later blob operations can find the row.
|
||||
//
|
||||
// The correct long-term fix is changing the schema to
|
||||
// "rowid INTEGER PRIMARY KEY"
|
||||
// which makes it a true alias, but that would break existing databases.
|
||||
|
||||
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
|
||||
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
|
||||
|
|
@ -4515,9 +4536,10 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk
|
|||
i64 vectorsSize =
|
||||
p->chunk_size * vector_column_byte_size(p->vector_columns[vector_column_idx]);
|
||||
|
||||
// See SHADOW_TABLE_ROWID_QUIRK above for why _rowid_ and rowid are both set.
|
||||
zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_VECTOR_N_NAME
|
||||
"(rowid, vectors)"
|
||||
"VALUES (?, ?)",
|
||||
"(_rowid_, rowid, vectors)"
|
||||
"VALUES (?, ?, ?)",
|
||||
p->schemaName, p->tableName, vector_column_idx);
|
||||
if (!zSql) {
|
||||
return SQLITE_NOMEM;
|
||||
|
|
@ -4530,8 +4552,9 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk
|
|||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_int64(stmt, 1, rowid);
|
||||
sqlite3_bind_zeroblob64(stmt, 2, vectorsSize);
|
||||
sqlite3_bind_int64(stmt, 1, rowid); // _rowid_ (internal SQLite rowid)
|
||||
sqlite3_bind_int64(stmt, 2, rowid); // rowid (user-defined column)
|
||||
sqlite3_bind_zeroblob64(stmt, 3, vectorsSize);
|
||||
|
||||
rc = sqlite3_step(stmt);
|
||||
sqlite3_finalize(stmt);
|
||||
|
|
@ -4546,9 +4569,10 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk
|
|||
continue;
|
||||
}
|
||||
int metadata_column_idx = p->user_column_idxs[i];
|
||||
// See SHADOW_TABLE_ROWID_QUIRK above for why _rowid_ and rowid are both set.
|
||||
zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_N_NAME
|
||||
"(rowid, data)"
|
||||
"VALUES (?, ?)",
|
||||
"(_rowid_, rowid, data)"
|
||||
"VALUES (?, ?, ?)",
|
||||
p->schemaName, p->tableName, metadata_column_idx);
|
||||
if (!zSql) {
|
||||
return SQLITE_NOMEM;
|
||||
|
|
@ -4561,8 +4585,9 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk
|
|||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_int64(stmt, 1, rowid);
|
||||
sqlite3_bind_zeroblob64(stmt, 2, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size));
|
||||
sqlite3_bind_int64(stmt, 1, rowid); // _rowid_ (internal SQLite rowid)
|
||||
sqlite3_bind_int64(stmt, 2, rowid); // rowid (user-defined column)
|
||||
sqlite3_bind_zeroblob64(stmt, 3, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size));
|
||||
|
||||
rc = sqlite3_step(stmt);
|
||||
sqlite3_finalize(stmt);
|
||||
|
|
@ -5126,6 +5151,8 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
|
|||
sqlite3_finalize(stmt);
|
||||
}
|
||||
|
||||
// See SHADOW_TABLE_ROWID_QUIRK in vec0_new_chunk() — same "rowid PRIMARY KEY"
|
||||
// without INTEGER type issue applies here.
|
||||
for (int i = 0; i < pNew->numMetadataColumns; i++) {
|
||||
char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME "(rowid PRIMARY KEY, data BLOB NOT NULL);",
|
||||
pNew->schemaName, pNew->tableName, i);
|
||||
|
|
@ -8574,6 +8601,200 @@ cleanup:
|
|||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
int vec0Update_Delete_ClearRowid(vec0_vtab *p, i64 chunk_id,
|
||||
u64 chunk_offset) {
|
||||
int rc, brc;
|
||||
sqlite3_blob *blobChunksRowids = NULL;
|
||||
i64 zero = 0;
|
||||
|
||||
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
|
||||
chunk_id, 1, &blobChunksRowids);
|
||||
if (rc != SQLITE_OK) {
|
||||
vtab_set_error(&p->base, "could not open rowids blob for %s.%s.%lld",
|
||||
p->schemaName, p->shadowChunksName, chunk_id);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
rc = sqlite3_blob_write(blobChunksRowids, &zero, sizeof(zero),
|
||||
chunk_offset * sizeof(i64));
|
||||
if (rc != SQLITE_OK) {
|
||||
vtab_set_error(&p->base,
|
||||
"could not write to rowids blob for %s.%s.%lld at %llu",
|
||||
p->schemaName, p->shadowChunksName, chunk_id, chunk_offset);
|
||||
}
|
||||
|
||||
brc = sqlite3_blob_close(blobChunksRowids);
|
||||
if (rc != SQLITE_OK)
|
||||
return rc;
|
||||
if (brc != SQLITE_OK) {
|
||||
vtab_set_error(&p->base,
|
||||
"vec0 deletion error: Error commiting rowids blob "
|
||||
"transaction on %s.%s.%lld at %llu",
|
||||
p->schemaName, p->shadowChunksName, chunk_id, chunk_offset);
|
||||
return brc;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
int vec0Update_Delete_ClearVectors(vec0_vtab *p, i64 chunk_id,
|
||||
u64 chunk_offset) {
|
||||
int rc, brc;
|
||||
for (int i = 0; i < p->numVectorColumns; i++) {
|
||||
sqlite3_blob *blobVectors = NULL;
|
||||
size_t n = vector_column_byte_size(p->vector_columns[i]);
|
||||
|
||||
rc = sqlite3_blob_open(p->db, p->schemaName,
|
||||
p->shadowVectorChunksNames[i], "vectors",
|
||||
chunk_id, 1, &blobVectors);
|
||||
if (rc != SQLITE_OK) {
|
||||
vtab_set_error(&p->base,
|
||||
"could not open vector blob for %s.%s.%lld column %d",
|
||||
p->schemaName, p->shadowVectorChunksNames[i], chunk_id, i);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
void *zeroBuf = sqlite3_malloc(n);
|
||||
if (!zeroBuf) {
|
||||
sqlite3_blob_close(blobVectors);
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(zeroBuf, 0, n);
|
||||
|
||||
rc = sqlite3_blob_write(blobVectors, zeroBuf, n, chunk_offset * n);
|
||||
sqlite3_free(zeroBuf);
|
||||
if (rc != SQLITE_OK) {
|
||||
vtab_set_error(
|
||||
&p->base,
|
||||
"could not write to vector blob for %s.%s.%lld at %llu column %d",
|
||||
p->schemaName, p->shadowVectorChunksNames[i], chunk_id,
|
||||
chunk_offset, i);
|
||||
}
|
||||
|
||||
brc = sqlite3_blob_close(blobVectors);
|
||||
if (rc != SQLITE_OK)
|
||||
return rc;
|
||||
if (brc != SQLITE_OK) {
|
||||
vtab_set_error(&p->base,
|
||||
"vec0 deletion error: Error commiting vector blob "
|
||||
"transaction on %s.%s.%lld column %d",
|
||||
p->schemaName, p->shadowVectorChunksNames[i], chunk_id, i);
|
||||
return brc;
|
||||
}
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
int vec0Update_Delete_DeleteChunkIfEmpty(vec0_vtab *p, i64 chunk_id,
|
||||
int *deleted) {
|
||||
int rc, brc;
|
||||
sqlite3_blob *blobValidity = NULL;
|
||||
*deleted = 0;
|
||||
|
||||
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
|
||||
chunk_id, 0, &blobValidity);
|
||||
if (rc != SQLITE_OK) {
|
||||
vtab_set_error(&p->base,
|
||||
"could not open validity blob for chunk %lld", chunk_id);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
int validitySize = sqlite3_blob_bytes(blobValidity);
|
||||
unsigned char *validityBuf = sqlite3_malloc(validitySize);
|
||||
if (!validityBuf) {
|
||||
sqlite3_blob_close(blobValidity);
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
|
||||
rc = sqlite3_blob_read(blobValidity, validityBuf, validitySize, 0);
|
||||
brc = sqlite3_blob_close(blobValidity);
|
||||
if (rc != SQLITE_OK) {
|
||||
sqlite3_free(validityBuf);
|
||||
return rc;
|
||||
}
|
||||
if (brc != SQLITE_OK) {
|
||||
sqlite3_free(validityBuf);
|
||||
return brc;
|
||||
}
|
||||
|
||||
int allZero = 1;
|
||||
for (int i = 0; i < validitySize; i++) {
|
||||
if (validityBuf[i] != 0) {
|
||||
allZero = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
sqlite3_free(validityBuf);
|
||||
|
||||
if (!allZero) {
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
// All validity bits are zero — delete this chunk and its associated data
|
||||
char *zSql;
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
// Delete from _chunks
|
||||
zSql = sqlite3_mprintf(
|
||||
"DELETE FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE rowid = ?",
|
||||
p->schemaName, p->tableName);
|
||||
if (!zSql)
|
||||
return SQLITE_NOMEM;
|
||||
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||
sqlite3_free(zSql);
|
||||
if (rc != SQLITE_OK)
|
||||
return rc;
|
||||
sqlite3_bind_int64(stmt, 1, chunk_id);
|
||||
rc = sqlite3_step(stmt);
|
||||
sqlite3_finalize(stmt);
|
||||
if (rc != SQLITE_DONE)
|
||||
return SQLITE_ERROR;
|
||||
|
||||
// Delete from each _vector_chunksNN
|
||||
for (int i = 0; i < p->numVectorColumns; i++) {
|
||||
zSql = sqlite3_mprintf(
|
||||
"DELETE FROM " VEC0_SHADOW_VECTOR_N_NAME " WHERE rowid = ?",
|
||||
p->schemaName, p->tableName, i);
|
||||
if (!zSql)
|
||||
return SQLITE_NOMEM;
|
||||
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||
sqlite3_free(zSql);
|
||||
if (rc != SQLITE_OK)
|
||||
return rc;
|
||||
sqlite3_bind_int64(stmt, 1, chunk_id);
|
||||
rc = sqlite3_step(stmt);
|
||||
sqlite3_finalize(stmt);
|
||||
if (rc != SQLITE_DONE)
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
// Delete from each _metadatachunksNN
|
||||
for (int i = 0; i < p->numMetadataColumns; i++) {
|
||||
zSql = sqlite3_mprintf(
|
||||
"DELETE FROM " VEC0_SHADOW_METADATA_N_NAME " WHERE rowid = ?",
|
||||
p->schemaName, p->tableName, i);
|
||||
if (!zSql)
|
||||
return SQLITE_NOMEM;
|
||||
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
|
||||
sqlite3_free(zSql);
|
||||
if (rc != SQLITE_OK)
|
||||
return rc;
|
||||
sqlite3_bind_int64(stmt, 1, chunk_id);
|
||||
rc = sqlite3_step(stmt);
|
||||
sqlite3_finalize(stmt);
|
||||
if (rc != SQLITE_DONE)
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
// Invalidate cached stmtLatestChunk so it gets re-prepared on next insert
|
||||
if (p->stmtLatestChunk) {
|
||||
sqlite3_finalize(p->stmtLatestChunk);
|
||||
p->stmtLatestChunk = NULL;
|
||||
}
|
||||
|
||||
*deleted = 1;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) {
|
||||
int rc;
|
||||
sqlite3_stmt *stmt = NULL;
|
||||
|
|
@ -8735,16 +8956,23 @@ int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
|
|||
return rc;
|
||||
}
|
||||
|
||||
// 2. clear validity bit
|
||||
rc = vec0Update_Delete_ClearValidity(p, chunk_id, chunk_offset);
|
||||
if (rc != SQLITE_OK) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
// 3. zero out rowid in chunks.rowids
|
||||
// https://github.com/asg017/sqlite-vec/issues/54
|
||||
rc = vec0Update_Delete_ClearRowid(p, chunk_id, chunk_offset);
|
||||
if (rc != SQLITE_OK) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
// 4. zero out any data in vector chunks tables
|
||||
// https://github.com/asg017/sqlite-vec/issues/54
|
||||
rc = vec0Update_Delete_ClearVectors(p, chunk_id, chunk_offset);
|
||||
if (rc != SQLITE_OK) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
// 5. delete from _rowids table
|
||||
rc = vec0Update_Delete_DeleteRowids(p, rowid);
|
||||
|
|
@ -8760,9 +8988,21 @@ int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
|
|||
}
|
||||
}
|
||||
|
||||
// 6. delete metadata
|
||||
// 7. delete metadata
|
||||
for(int i = 0; i < p->numMetadataColumns; i++) {
|
||||
rc = vec0Update_Delete_ClearMetadata(p, i, rowid, chunk_id, chunk_offset);
|
||||
if (rc != SQLITE_OK) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
// 8. reclaim chunk if fully empty
|
||||
{
|
||||
int chunkDeleted;
|
||||
rc = vec0Update_Delete_DeleteChunkIfEmpty(p, chunk_id, &chunkDeleted);
|
||||
if (rc != SQLITE_OK) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@
|
|||
'chunk_id': 1,
|
||||
'size': 8,
|
||||
'validity': b'\x06',
|
||||
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
@ -163,7 +163,7 @@
|
|||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'vectors': b'\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'vectors': b'\x00\x00\x00\x00\x00\x00\x00@\x00\x00@@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@
|
|||
OrderedDict({
|
||||
'chunk_id': 1,
|
||||
'size': 8,
|
||||
'validity': b'\x02',
|
||||
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'validity': b'\x06',
|
||||
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
@ -37,7 +37,7 @@
|
|||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'data': b'\x02',
|
||||
'data': b'\x06',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
@ -46,7 +46,7 @@
|
|||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
@ -55,7 +55,7 @@
|
|||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x9a\x99\x99\x99\x99\x99\x01@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
@ -64,13 +64,17 @@
|
|||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00test2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'data': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00test2\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00123456789012\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
'v_metadatatext03': OrderedDict({
|
||||
'sql': 'select * from v_metadatatext03',
|
||||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 3,
|
||||
'data': '1234567890123',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
'v_rowids': OrderedDict({
|
||||
|
|
@ -82,6 +86,12 @@
|
|||
'chunk_id': 1,
|
||||
'chunk_offset': 1,
|
||||
}),
|
||||
OrderedDict({
|
||||
'rowid': 3,
|
||||
'id': None,
|
||||
'chunk_id': 1,
|
||||
'chunk_offset': 2,
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
'v_vector_chunks00': OrderedDict({
|
||||
|
|
@ -89,7 +99,7 @@
|
|||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'vectors': b'\x00\x00\x00\x00""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
@ -264,7 +274,7 @@
|
|||
'chunk_id': 1,
|
||||
'size': 8,
|
||||
'validity': b'\x06',
|
||||
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
@ -335,7 +345,7 @@
|
|||
'rows': list([
|
||||
OrderedDict({
|
||||
'rowid': 1,
|
||||
'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
'vectors': b'\x00\x00\x00\x00""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
|
|
@ -360,6 +370,14 @@
|
|||
'f': 2.2,
|
||||
't': 'test2',
|
||||
}),
|
||||
OrderedDict({
|
||||
'rowid': 3,
|
||||
'vector': b'3333',
|
||||
'b': 1,
|
||||
'n': 3,
|
||||
'f': 3.3,
|
||||
't': '1234567890123',
|
||||
}),
|
||||
]),
|
||||
})
|
||||
# ---
|
||||
|
|
|
|||
|
|
@ -69,9 +69,13 @@ $(TARGET_DIR)/vec_each: vec-each.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
|||
$(TARGET_DIR)/vec_mismatch: vec-mismatch.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||
|
||||
$(TARGET_DIR)/vec0_delete_completeness: vec0-delete-completeness.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||
|
||||
FUZZ_TARGETS = vec0_create exec json numpy \
|
||||
shadow_corrupt vec0_operations scalar_functions \
|
||||
vec0_create_full metadata_columns vec_each vec_mismatch
|
||||
vec0_create_full metadata_columns vec_each vec_mismatch \
|
||||
vec0_delete_completeness
|
||||
|
||||
all: $(addprefix $(TARGET_DIR)/,$(FUZZ_TARGETS))
|
||||
|
||||
|
|
|
|||
114
tests/fuzz/vec0-delete-completeness.c
Normal file
114
tests/fuzz/vec0-delete-completeness.c
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "sqlite-vec.h"
|
||||
#include "sqlite3.h"
|
||||
#include <assert.h>
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
if (size < 6) return 0;
|
||||
|
||||
int rc;
|
||||
sqlite3 *db;
|
||||
sqlite3_stmt *stmtInsert = NULL;
|
||||
sqlite3_stmt *stmtDelete = NULL;
|
||||
sqlite3_stmt *stmtScan = NULL;
|
||||
sqlite3_stmt *stmtCount = NULL;
|
||||
|
||||
rc = sqlite3_open(":memory:", &db);
|
||||
assert(rc == SQLITE_OK);
|
||||
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||
assert(rc == SQLITE_OK);
|
||||
|
||||
rc = sqlite3_exec(db,
|
||||
"CREATE VIRTUAL TABLE v USING vec0(emb float[4], chunk_size=4)",
|
||||
NULL, NULL, NULL);
|
||||
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
|
||||
|
||||
sqlite3_prepare_v2(db,
|
||||
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||
sqlite3_prepare_v2(db,
|
||||
"DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL);
|
||||
sqlite3_prepare_v2(db,
|
||||
"SELECT rowid FROM v", -1, &stmtScan, NULL);
|
||||
|
||||
if (!stmtInsert || !stmtDelete || !stmtScan) goto cleanup;
|
||||
|
||||
size_t i = 0;
|
||||
while (i + 2 <= size) {
|
||||
uint8_t op = data[i++] % 3;
|
||||
uint8_t rowid_byte = data[i++];
|
||||
int64_t rowid = (int64_t)(rowid_byte % 16) + 1;
|
||||
|
||||
switch (op) {
|
||||
case 0: {
|
||||
// INSERT
|
||||
float vec[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
for (int j = 0; j < 4 && i < size; j++, i++) {
|
||||
vec[j] = (float)((int8_t)data[i]) / 10.0f;
|
||||
}
|
||||
sqlite3_reset(stmtInsert);
|
||||
sqlite3_bind_int64(stmtInsert, 1, rowid);
|
||||
sqlite3_bind_blob(stmtInsert, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
|
||||
sqlite3_step(stmtInsert);
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
// DELETE
|
||||
sqlite3_reset(stmtDelete);
|
||||
sqlite3_bind_int64(stmtDelete, 1, rowid);
|
||||
sqlite3_step(stmtDelete);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
// Full scan
|
||||
sqlite3_reset(stmtScan);
|
||||
while (sqlite3_step(stmtScan) == SQLITE_ROW) {}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Delete all remaining rows
|
||||
sqlite3_exec(db, "DELETE FROM v", NULL, NULL, NULL);
|
||||
|
||||
// Assert all shadow tables are empty after full deletion
|
||||
sqlite3_prepare_v2(db,
|
||||
"SELECT count(*) FROM v_rowids", -1, &stmtCount, NULL);
|
||||
if (stmtCount) {
|
||||
rc = sqlite3_step(stmtCount);
|
||||
assert(rc == SQLITE_ROW);
|
||||
assert(sqlite3_column_int(stmtCount, 0) == 0);
|
||||
sqlite3_finalize(stmtCount);
|
||||
stmtCount = NULL;
|
||||
}
|
||||
|
||||
sqlite3_prepare_v2(db,
|
||||
"SELECT count(*) FROM v_chunks", -1, &stmtCount, NULL);
|
||||
if (stmtCount) {
|
||||
rc = sqlite3_step(stmtCount);
|
||||
assert(rc == SQLITE_ROW);
|
||||
assert(sqlite3_column_int(stmtCount, 0) == 0);
|
||||
sqlite3_finalize(stmtCount);
|
||||
stmtCount = NULL;
|
||||
}
|
||||
|
||||
sqlite3_prepare_v2(db,
|
||||
"SELECT count(*) FROM v_vector_chunks00", -1, &stmtCount, NULL);
|
||||
if (stmtCount) {
|
||||
rc = sqlite3_step(stmtCount);
|
||||
assert(rc == SQLITE_ROW);
|
||||
assert(sqlite3_column_int(stmtCount, 0) == 0);
|
||||
sqlite3_finalize(stmtCount);
|
||||
stmtCount = NULL;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
sqlite3_finalize(stmtInsert);
|
||||
sqlite3_finalize(stmtDelete);
|
||||
sqlite3_finalize(stmtScan);
|
||||
sqlite3_close(db);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import sqlite3
|
||||
import struct
|
||||
import pytest
|
||||
from helpers import _f32, exec
|
||||
from helpers import _f32, _i64, _int8, exec
|
||||
|
||||
|
||||
def test_insert_creates_chunks_and_vectors(db, snapshot):
|
||||
|
|
@ -147,3 +147,335 @@ def test_insert_validates_type(db):
|
|||
def test_info_table_contents(db, snapshot):
|
||||
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||
assert exec(db, "select key, value from v_info order by key") == snapshot()
|
||||
|
||||
|
||||
def test_delete_zeroes_rowid_blob(db):
|
||||
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||
|
||||
for i in range(1, 4):
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (?, ?)",
|
||||
[i, _f32([float(i)] * 4)],
|
||||
)
|
||||
|
||||
db.execute("delete from v where rowid = 2")
|
||||
|
||||
blob = db.execute("select rowids from v_chunks where rowid = 1").fetchone()[0]
|
||||
rowids = struct.unpack("<8q", blob)
|
||||
assert rowids[0] == 1 # slot 0 intact
|
||||
assert rowids[1] == 0 # slot 1 zeroed (was rowid 2)
|
||||
assert rowids[2] == 3 # slot 2 intact
|
||||
|
||||
|
||||
def test_delete_zeroes_vector_blob(db):
|
||||
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (1, ?)", [_f32([1.0, 2.0, 3.0, 4.0])]
|
||||
)
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (2, ?)", [_f32([5.0, 6.0, 7.0, 8.0])]
|
||||
)
|
||||
|
||||
db.execute("delete from v where rowid = 1")
|
||||
|
||||
blob = db.execute(
|
||||
"select vectors from v_vector_chunks00 where rowid = 1"
|
||||
).fetchone()[0]
|
||||
# First slot (4 floats = 16 bytes) should be zeroed
|
||||
first_slot = struct.unpack("<4f", blob[:16])
|
||||
assert first_slot == (0.0, 0.0, 0.0, 0.0)
|
||||
# Second slot should be unchanged
|
||||
second_slot = struct.unpack("<4f", blob[16:32])
|
||||
assert second_slot == (5.0, 6.0, 7.0, 8.0)
|
||||
|
||||
|
||||
def test_delete_all_rows_deletes_chunk(db):
|
||||
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||
|
||||
for i in range(1, 9):
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (?, ?)",
|
||||
[i, _f32([float(i)] * 4)],
|
||||
)
|
||||
|
||||
for i in range(1, 9):
|
||||
db.execute("delete from v where rowid = ?", [i])
|
||||
|
||||
assert (
|
||||
db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
||||
)
|
||||
assert (
|
||||
db.execute("select count(*) from v_vector_chunks00").fetchone()[0] == 0
|
||||
)
|
||||
|
||||
# Inserting after full deletion still works
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (100, ?)", [_f32([9.0, 9.0, 9.0, 9.0])]
|
||||
)
|
||||
row = db.execute("select emb from v where rowid = 100").fetchone()
|
||||
assert row[0] == _f32([9.0, 9.0, 9.0, 9.0])
|
||||
|
||||
|
||||
def test_delete_chunk_multiple_chunks(db):
|
||||
db.execute("create virtual table v using vec0(emb float[4], chunk_size=8)")
|
||||
|
||||
for i in range(1, 17):
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (?, ?)",
|
||||
[i, _f32([float(i)] * 4)],
|
||||
)
|
||||
|
||||
# Delete all rows from the first chunk (rows 1-8)
|
||||
for i in range(1, 9):
|
||||
db.execute("delete from v where rowid = ?", [i])
|
||||
|
||||
# Only 1 chunk should remain
|
||||
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 1
|
||||
|
||||
# Rows 9-16 still queryable
|
||||
for i in range(9, 17):
|
||||
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||
assert row[0] == _f32([float(i)] * 4)
|
||||
|
||||
|
||||
def test_delete_with_metadata_columns(db):
|
||||
db.execute(
|
||||
"create virtual table v using vec0("
|
||||
"emb float[4], "
|
||||
"m_bool boolean, "
|
||||
"m_int integer, "
|
||||
"m_float float, "
|
||||
"m_text text, "
|
||||
"chunk_size=8"
|
||||
")"
|
||||
)
|
||||
|
||||
for i in range(1, 9):
|
||||
db.execute(
|
||||
"insert into v(rowid, emb, m_bool, m_int, m_float, m_text) "
|
||||
"values (?, ?, ?, ?, ?, ?)",
|
||||
[i, _f32([float(i)] * 4), i % 2 == 0, i * 10, float(i) / 2.0, f"text_{i}"],
|
||||
)
|
||||
|
||||
for i in range(1, 9):
|
||||
db.execute("delete from v where rowid = ?", [i])
|
||||
|
||||
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
||||
assert db.execute("select count(*) from v_vector_chunks00").fetchone()[0] == 0
|
||||
assert db.execute("select count(*) from v_metadatachunks00").fetchone()[0] == 0
|
||||
assert db.execute("select count(*) from v_metadatachunks01").fetchone()[0] == 0
|
||||
assert db.execute("select count(*) from v_metadatachunks02").fetchone()[0] == 0
|
||||
assert db.execute("select count(*) from v_metadatachunks03").fetchone()[0] == 0
|
||||
|
||||
|
||||
def test_delete_with_auxiliary_columns(db):
|
||||
db.execute(
|
||||
"create virtual table v using vec0("
|
||||
"emb float[4], "
|
||||
"+aux_text text, "
|
||||
"chunk_size=8"
|
||||
")"
|
||||
)
|
||||
|
||||
for i in range(1, 9):
|
||||
db.execute(
|
||||
"insert into v(rowid, emb, aux_text) values (?, ?, ?)",
|
||||
[i, _f32([float(i)] * 4), f"aux_{i}"],
|
||||
)
|
||||
|
||||
for i in range(1, 9):
|
||||
db.execute("delete from v where rowid = ?", [i])
|
||||
|
||||
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
||||
assert db.execute("select count(*) from v_auxiliary").fetchone()[0] == 0
|
||||
|
||||
|
||||
def test_delete_with_text_primary_key(db):
|
||||
db.execute(
|
||||
"create virtual table v using vec0("
|
||||
"id text primary key, emb float[4], chunk_size=8"
|
||||
")"
|
||||
)
|
||||
|
||||
db.execute(
|
||||
"insert into v(id, emb) values ('a', ?)", [_f32([1.0, 2.0, 3.0, 4.0])]
|
||||
)
|
||||
db.execute(
|
||||
"insert into v(id, emb) values ('b', ?)", [_f32([5.0, 6.0, 7.0, 8.0])]
|
||||
)
|
||||
|
||||
db.execute("delete from v where id = 'a'")
|
||||
|
||||
# Vector blob slot 0 should be zeroed
|
||||
blob = db.execute(
|
||||
"select vectors from v_vector_chunks00 where rowid = 1"
|
||||
).fetchone()[0]
|
||||
first_slot = struct.unpack("<4f", blob[:16])
|
||||
assert first_slot == (0.0, 0.0, 0.0, 0.0)
|
||||
|
||||
# Remaining row still queryable
|
||||
row = db.execute("select emb from v where id = 'b'").fetchone()
|
||||
assert row[0] == _f32([5.0, 6.0, 7.0, 8.0])
|
||||
|
||||
|
||||
def test_delete_with_partition_keys(db):
|
||||
db.execute(
|
||||
"create virtual table v using vec0("
|
||||
"part text partition key, emb float[4], chunk_size=8"
|
||||
")"
|
||||
)
|
||||
|
||||
for i in range(1, 9):
|
||||
db.execute(
|
||||
"insert into v(rowid, part, emb) values (?, 'A', ?)",
|
||||
[i, _f32([float(i)] * 4)],
|
||||
)
|
||||
for i in range(9, 17):
|
||||
db.execute(
|
||||
"insert into v(rowid, part, emb) values (?, 'B', ?)",
|
||||
[i, _f32([float(i)] * 4)],
|
||||
)
|
||||
|
||||
# Delete all from partition A
|
||||
for i in range(1, 9):
|
||||
db.execute("delete from v where rowid = ?", [i])
|
||||
|
||||
# 1 chunk should remain (partition B's)
|
||||
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 1
|
||||
|
||||
# Partition B rows intact
|
||||
for i in range(9, 17):
|
||||
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||
assert row[0] == _f32([float(i)] * 4)
|
||||
|
||||
# Re-insert into partition A works
|
||||
db.execute(
|
||||
"insert into v(rowid, part, emb) values (100, 'A', ?)",
|
||||
[_f32([99.0, 99.0, 99.0, 99.0])],
|
||||
)
|
||||
row = db.execute("select emb from v where rowid = 100").fetchone()
|
||||
assert row[0] == _f32([99.0, 99.0, 99.0, 99.0])
|
||||
|
||||
|
||||
def test_delete_int8_vectors(db):
|
||||
db.execute("create virtual table v using vec0(emb int8[4], chunk_size=8)")
|
||||
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (1, vec_int8(?))",
|
||||
[_int8([1, 2, 3, 4])],
|
||||
)
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (2, vec_int8(?))",
|
||||
[_int8([5, 6, 7, 8])],
|
||||
)
|
||||
|
||||
db.execute("delete from v where rowid = 1")
|
||||
|
||||
blob = db.execute(
|
||||
"select vectors from v_vector_chunks00 where rowid = 1"
|
||||
).fetchone()[0]
|
||||
# int8[4] = 4 bytes per slot
|
||||
first_slot = struct.unpack("<4b", blob[:4])
|
||||
assert first_slot == (0, 0, 0, 0)
|
||||
second_slot = struct.unpack("<4b", blob[4:8])
|
||||
assert second_slot == (5, 6, 7, 8)
|
||||
|
||||
|
||||
def test_delete_bit_vectors(db):
|
||||
db.execute("create virtual table v using vec0(emb bit[8], chunk_size=8)")
|
||||
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (1, vec_bit(?))",
|
||||
[bytes([0xFF])],
|
||||
)
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (2, vec_bit(?))",
|
||||
[bytes([0xAA])],
|
||||
)
|
||||
|
||||
db.execute("delete from v where rowid = 1")
|
||||
|
||||
blob = db.execute(
|
||||
"select vectors from v_vector_chunks00 where rowid = 1"
|
||||
).fetchone()[0]
|
||||
# bit[8] = 1 byte per slot
|
||||
assert blob[0:1] == bytes([0x00])
|
||||
assert blob[1:2] == bytes([0xAA])
|
||||
|
||||
|
||||
def _file_db(tmp_path):
|
||||
"""Open a file-backed DB (required for page_count to shrink after VACUUM)."""
|
||||
db = sqlite3.connect(str(tmp_path / "test.db"))
|
||||
db.row_factory = sqlite3.Row
|
||||
db.enable_load_extension(True)
|
||||
db.load_extension("dist/vec0")
|
||||
db.enable_load_extension(False)
|
||||
return db
|
||||
|
||||
|
||||
def test_delete_chunk_shrinks_pages(tmp_path):
|
||||
"""Use large vectors (float[256]) so each chunk blob spans multiple pages,
|
||||
making the page_count difference measurable after VACUUM."""
|
||||
dims = 256
|
||||
db = _file_db(tmp_path)
|
||||
db.execute(f"create virtual table v using vec0(emb float[{dims}], chunk_size=8)")
|
||||
|
||||
for i in range(1, 25): # 3 full chunks of 8
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (?, ?)",
|
||||
[i, _f32([float(i)] * dims)],
|
||||
)
|
||||
db.commit()
|
||||
pages_before = db.execute("pragma page_count").fetchone()[0]
|
||||
|
||||
# Delete all rows
|
||||
for i in range(1, 25):
|
||||
db.execute("delete from v where rowid = ?", [i])
|
||||
db.commit()
|
||||
|
||||
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 0
|
||||
|
||||
db.execute("vacuum")
|
||||
pages_after = db.execute("pragma page_count").fetchone()[0]
|
||||
assert pages_after < pages_before, (
|
||||
f"page_count should shrink after deleting all chunks and vacuum: "
|
||||
f"{pages_before} -> {pages_after}"
|
||||
)
|
||||
db.close()
|
||||
|
||||
|
||||
def test_delete_one_chunk_of_two_shrinks_pages(tmp_path):
|
||||
"""Use large vectors (float[256]) so each chunk blob spans multiple pages,
|
||||
making the page_count difference measurable after VACUUM."""
|
||||
dims = 256
|
||||
db = _file_db(tmp_path)
|
||||
db.execute(f"create virtual table v using vec0(emb float[{dims}], chunk_size=8)")
|
||||
|
||||
for i in range(1, 17): # 2 full chunks of 8
|
||||
db.execute(
|
||||
"insert into v(rowid, emb) values (?, ?)",
|
||||
[i, _f32([float(i)] * dims)],
|
||||
)
|
||||
db.commit()
|
||||
pages_before = db.execute("pragma page_count").fetchone()[0]
|
||||
|
||||
# Delete all rows from the first chunk (rows 1-8)
|
||||
for i in range(1, 9):
|
||||
db.execute("delete from v where rowid = ?", [i])
|
||||
db.commit()
|
||||
|
||||
assert db.execute("select count(*) from v_chunks").fetchone()[0] == 1
|
||||
|
||||
db.execute("vacuum")
|
||||
pages_after = db.execute("pragma page_count").fetchone()[0]
|
||||
assert pages_after < pages_before, (
|
||||
f"page_count should shrink after deleting one chunk and vacuum: "
|
||||
f"{pages_before} -> {pages_after}"
|
||||
)
|
||||
|
||||
# Remaining rows still queryable after vacuum
|
||||
for i in range(9, 17):
|
||||
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
|
||||
assert row[0] == _f32([float(i)] * dims)
|
||||
db.close()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue