error handling on vec0 INSERT operations, introduce evidence-of

This commit is contained in:
Alex Garcia 2024-06-23 17:46:42 -07:00
parent f9e1bf2f34
commit 4196f16b27
3 changed files with 605 additions and 115 deletions

View file

@ -164,17 +164,21 @@ lint:
progress:
deno run --allow-read=sqlite-vec.c scripts/progress.ts
evidence-of:
@echo "EVIDENCE-OF: V$(shell printf "%05d" $$((RANDOM % 100000)))_$(shell printf "%05d" $$((RANDOM % 100000)))"
test:
sqlite3 :memory: '.read test.sql'
.PHONY: version loadable static test clean gh-release
.PHONY: version loadable static test clean gh-release evidence-of
publish-release:
./scripts/publish-release.sh
# -k test_vec0_update
test-loadable: loadable
$(PYTHON) -m pytest -vv -s tests/test-loadable.py
$(PYTHON) -m pytest -vv -s -x tests/test-loadable.py
test-loadable-snapshot-update: loadable
$(PYTHON) -m pytest -vv tests/test-loadable.py --snapshot-update

View file

@ -2669,6 +2669,7 @@ struct vec0_vtab {
char *shadowChunksName;
// Name of all the vector chunk shadow tables.
// Ex '_vector_chunks00'
// Only the first numVectorColumns entries will be available.
// The first numVectorColumns entries must be freed with sqlite3_free()
char *shadowVectorChunksNames[VEC0_MAX_VECTOR_COLUMNS];
@ -4108,6 +4109,17 @@ static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context,
return SQLITE_OK;
}
#define VEC_INTERAL_ERROR "Internal sqlite-vec error: "
#define REPORT_URL "https://github.com/asg017/sqlite-vec/issues/new"
void vec0_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) {
va_list args;
sqlite3_free(pVTab->zErrMsg);
va_start(args, zFormat);
pVTab->zErrMsg = sqlite3_vmprintf(zFormat, args);
va_end(args);
}
/**
* @brief Handles the "insert rowid" step of a row insert operation of a vec0
* table.
@ -4135,51 +4147,120 @@ int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue,
// Option 3: vtab has a user-defined TEXT primary key, so ensure a text value
// is provided.
if (p->pkIsText) {
todo_assert(sqlite3_value_type(idValue) == SQLITE_TEXT);
if (sqlite3_value_type(idValue) != SQLITE_TEXT) {
// IMP: V04200_21039
vec0_set_error(&p->base,
"The %s virtual table was declared with a TEXT primary "
"key, but a non-TEXT value was provided in an INSERT.",
p->tableName);
return SQLITE_ERROR;
}
if (sqlite3_mutex_enter) {
sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
}
#ifdef SQLITE_VEC_THREADSAFE
sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
#endif
sqlite3_reset(p->stmtRowidsInsertId);
sqlite3_clear_bindings(p->stmtRowidsInsertId);
sqlite3_bind_value(p->stmtRowidsInsertId, 1, idValue);
rc = sqlite3_step(p->stmtRowidsInsertId);
todo_assert(rc == SQLITE_DONE);
if (rc != SQLITE_DONE) {
if (sqlite3_extended_errcode(p->db) == SQLITE_CONSTRAINT_UNIQUE) {
// IMP: V20497_04568
vec0_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
p->tableName);
} else {
// IMP: V24016_08086
vec0_set_error(
&p->base, "Error inserting into rowid shadow table: %s",
sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
}
rc = SQLITE_ERROR;
goto complete;
}
*rowid = sqlite3_last_insert_rowid(p->db);
#ifdef SQLITE_VEC_THREADSAFE
sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
#endif
rc = SQLITE_OK;
}
// Option 1: User supplied a i64 rowid
else if (sqlite3_value_type(idValue) == SQLITE_INTEGER) {
i64 suppliedRowid = sqlite3_value_int64(idValue);
sqlite3_reset(p->stmtRowidsInsertRowid);
sqlite3_clear_bindings(p->stmtRowidsInsertRowid);
sqlite3_bind_int64(p->stmtRowidsInsertRowid, 1, suppliedRowid);
rc = sqlite3_step(p->stmtRowidsInsertRowid);
todo_assert(rc == SQLITE_DONE);
*rowid = suppliedRowid;
}
// Option 2: User did not suppled a rowid
else {
todo_assert(sqlite3_value_type(idValue) == SQLITE_NULL);
#ifdef SQLITE_VEC_THREADSAFE
sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
#endif
complete:
sqlite3_reset(p->stmtRowidsInsertId);
sqlite3_clear_bindings(p->stmtRowidsInsertId);
// no need to bind a value to ?1 here: needs to be NULL
// so we can get the next autoincremented rowid value.
rc = sqlite3_step(p->stmtRowidsInsertId);
todo_assert(rc == SQLITE_DONE);
*rowid = sqlite3_last_insert_rowid(p->db);
#ifdef SQLITE_VEC_THREADSAFE
sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
#endif
if (sqlite3_mutex_leave) {
sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
}
return rc;
}
return SQLITE_OK;
// Option 1: User supplied a i64 rowid
if (sqlite3_value_type(idValue) == SQLITE_INTEGER) {
i64 suppliedRowid = sqlite3_value_int64(idValue);
if (sqlite3_mutex_enter) {
sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
}
sqlite3_bind_int64(p->stmtRowidsInsertRowid, 1, suppliedRowid);
rc = sqlite3_step(p->stmtRowidsInsertRowid);
if (rc != SQLITE_DONE) {
if (sqlite3_extended_errcode(p->db) == SQLITE_CONSTRAINT_PRIMARYKEY) {
// IMP: V17090_01160
vec0_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
p->tableName);
} else {
// IMP: V04679_21517
vec0_set_error(
&p->base, "Error inserting into rowid shadow table: %s",
sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
}
rc = SQLITE_ERROR;
goto complete2;
}
*rowid = suppliedRowid;
rc = SQLITE_OK;
complete2:
sqlite3_reset(p->stmtRowidsInsertRowid);
sqlite3_clear_bindings(p->stmtRowidsInsertRowid);
if (sqlite3_mutex_leave) {
sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
}
return rc;
}
// Option 2: User did not suppled a rowid
if (sqlite3_value_type(idValue) != SQLITE_NULL) {
// IMP: V30855_14925
vec0_set_error(&p->base,
"Only integers are allows for primary key values on %s",
p->tableName);
return SQLITE_ERROR;
}
if (sqlite3_mutex_enter) {
sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
}
// no need to bind a value to ?1 here: needs to be NULL
// so we can get the next autoincremented rowid value.
rc = sqlite3_step(p->stmtRowidsInsertId);
if (rc != SQLITE_DONE) {
// IMP: V15177_32015
vec0_set_error(&p->base, "Error inserting into rowid shadow table: %s",
sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
rc = SQLITE_ERROR;
goto complete3;
}
*rowid = sqlite3_last_insert_rowid(p->db);
rc = SQLITE_OK;
complete3:
sqlite3_reset(p->stmtRowidsInsertId);
sqlite3_clear_bindings(p->stmtRowidsInsertId);
if (sqlite3_mutex_leave) {
sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
}
return rc;
}
/**
@ -4209,27 +4290,68 @@ int vec0Update_InsertNextAvailableStep(
i64 validitySize;
*chunk_offset = -1;
sqlite3_reset(p->stmtLatestChunk);
rc = sqlite3_step(p->stmtLatestChunk);
todo_assert(rc == SQLITE_ROW);
if (rc != SQLITE_ROW) {
// IMP: V31559_15629
vec0_set_error(&p->base, VEC_INTERAL_ERROR "Could not find latest chunk");
rc = SQLITE_ERROR;
goto cleanup;
}
*chunk_rowid = sqlite3_column_int64(p->stmtLatestChunk, 0);
rc = sqlite3_step(p->stmtLatestChunk);
todo_assert(rc == SQLITE_DONE);
if (rc != SQLITE_DONE) {
vec0_set_error(&p->base,
VEC_INTERAL_ERROR
"unknown result code when closing out stmtLatestChunk. "
"Please file an issue: " REPORT_URL,
p->schemaName, p->shadowChunksName);
goto cleanup;
}
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
*chunk_rowid, 1, blobChunksValidity);
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
// IMP: V22053_06123
vec0_set_error(&p->base,
VEC_INTERAL_ERROR
"could not open validity blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, *chunk_rowid);
goto cleanup;
}
validitySize = sqlite3_blob_bytes(*blobChunksValidity);
todo_assert(validitySize == p->chunk_size / CHAR_BIT);
if (validitySize != p->chunk_size / CHAR_BIT) {
// IMP: V29362_13432
vec0_set_error(&p->base,
VEC_INTERAL_ERROR
"validity blob size mismatch on "
"%s.%s.%lld, expected %lld but received %lld.",
p->schemaName, p->shadowChunksName, *chunk_rowid,
p->chunk_size / CHAR_BIT, validitySize);
rc = SQLITE_ERROR;
goto cleanup;
}
*bufferChunksValidity = sqlite3_malloc(validitySize);
todo_assert(*bufferChunksValidity);
if (!(*bufferChunksValidity)) {
vec0_set_error(&p->base, VEC_INTERAL_ERROR
"Could not allocate memory for validity bitmap");
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
validitySize, 0);
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
vec0_set_error(&p->base,
VEC_INTERAL_ERROR
"Could not read validity bitmap for %s.%s.%lld",
p->schemaName, p->shadowChunksName, *chunk_rowid);
goto cleanup;
}
// find the next available offset, ie first `0` in the bitmap.
for (int i = 0; i < validitySize; i++) {
if ((*bufferChunksValidity)[i] == 0b11111111)
continue;
@ -4244,32 +4366,80 @@ int vec0Update_InsertNextAvailableStep(
done:
// latest chunk was full, so need to create a new one
if (*chunk_offset == -1) {
int rc = vec0_new_chunk(p, chunk_rowid);
assert(rc == SQLITE_OK);
rc = vec0_new_chunk(p, chunk_rowid);
if (rc != SQLITE_OK) {
// IMP: V08441_25279
vec0_set_error(&p->base,
VEC_INTERAL_ERROR "Could not insert a new vector chunk");
rc = SQLITE_ERROR; // otherwise raises a DatabaseError and not operational
// error?
goto cleanup;
}
*chunk_offset = 0;
// blobChunksValidity and pValidity are stale, pointing to the previous
// (full) chunk. to re-assign them
sqlite3_blob_close(*blobChunksValidity);
sqlite3_free((void *)*bufferChunksValidity);
*blobChunksValidity = NULL;
*bufferChunksValidity = NULL;
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName,
"validity", *chunk_rowid, 1, blobChunksValidity);
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
vec0_set_error(
&p->base,
VEC_INTERAL_ERROR
"Could not open validity blob for newly created chunk %s.%s.%lld",
p->schemaName, p->shadowChunksName, *chunk_rowid);
goto cleanup;
}
validitySize = sqlite3_blob_bytes(*blobChunksValidity);
todo_assert(validitySize == p->chunk_size / CHAR_BIT);
if (validitySize != p->chunk_size / CHAR_BIT) {
vec0_set_error(&p->base,
VEC_INTERAL_ERROR
"validity blob size mismatch for newly created chunk "
"%s.%s.%lld. Exepcted %lld, got %lld",
p->schemaName, p->shadowChunksName, *chunk_rowid,
p->chunk_size / CHAR_BIT, validitySize);
goto cleanup;
}
*bufferChunksValidity = sqlite3_malloc(validitySize);
rc = sqlite3_blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
validitySize, 0);
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
vec0_set_error(&p->base,
VEC_INTERAL_ERROR
"could not read validity blob newly created chunk "
"%s.%s.%lld",
p->schemaName, p->shadowChunksName, *chunk_rowid);
goto cleanup;
}
}
return SQLITE_OK;
rc = SQLITE_OK;
cleanup:
sqlite3_reset(p->stmtLatestChunk);
return rc;
}
static int vec0Update_InsertWriteFinalStepVectors(
sqlite3_blob *blobVectors, const void *bVector, i64 chunk_offset,
size_t dimensions, enum VectorElementType element_type) {
/**
* @brief Write the vector data into the provided vector blob at the given
* offset
*
* @param blobVectors SQLite BLOB to write to
* @param chunk_offset the "offset" (ie validity bitmap position) to write the
* vector to
* @param bVector pointer to the vector containing data
* @param dimensions how many dimensions the vector has
* @param element_type the vector type
* @return result of sqlite3_blob_write, SQLITE_OK on success, otherwise failure
*/
static int
vec0_write_vector_to_vector_blob(sqlite3_blob *blobVectors, i64 chunk_offset,
const void *bVector, size_t dimensions,
enum VectorElementType element_type) {
int n;
int offset;
@ -4288,9 +4458,7 @@ static int vec0Update_InsertWriteFinalStepVectors(
break;
}
int rc = sqlite3_blob_write(blobVectors, bVector, n, offset);
todo_assert(rc == SQLITE_OK);
return rc;
return sqlite3_blob_write(blobVectors, bVector, n, offset);
}
/**
@ -4312,7 +4480,6 @@ int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid,
sqlite3_blob *blobChunksValidity,
const unsigned char *bufferChunksValidity) {
int rc;
sqlite3_blob *blobChunksRowids;
// mark the validity bit for this row in the chunk's validity bitmap
// Get the byte offset of the bitmap
@ -4321,60 +4488,115 @@ int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid,
bx = bx | (1 << (chunk_offset % CHAR_BIT));
// write that 1 byte
rc = sqlite3_blob_write(blobChunksValidity, &bx, 1, chunk_offset / CHAR_BIT);
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
vec0_set_error(&p->base, VEC_INTERAL_ERROR "could not mark validity bit ");
return rc;
}
// Go insert the vector data into the vector chunk shadow tables
for (int i = 0; i < p->numVectorColumns; i++) {
sqlite3_blob *blobVectors;
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
"vectors", chunk_rowid, 1, &blobVectors);
todo_assert(rc == SQLITE_OK);
switch (p->vector_columns[i].element_type) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
todo_assert((unsigned long)sqlite3_blob_bytes(blobVectors) ==
p->chunk_size * p->vector_columns[i].dimensions *
sizeof(f32));
break;
case SQLITE_VEC_ELEMENT_TYPE_INT8:
todo_assert((unsigned long)sqlite3_blob_bytes(blobVectors) ==
p->chunk_size * p->vector_columns[i].dimensions * sizeof(i8));
break;
case SQLITE_VEC_ELEMENT_TYPE_BIT:
todo_assert((unsigned long)sqlite3_blob_bytes(blobVectors) ==
p->chunk_size * p->vector_columns[i].dimensions / CHAR_BIT);
break;
if (rc != SQLITE_OK) {
vec0_set_error(&p->base, "Error opening vector blob at %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
goto cleanup;
}
rc = vec0Update_InsertWriteFinalStepVectors(
blobVectors, vectorDatas[i], chunk_offset,
i64 expected =
p->chunk_size * vector_column_byte_size(p->vector_columns[i]);
i64 actual = sqlite3_blob_bytes(blobVectors);
if (actual != expected) {
// IMP: V16386_00456
vec0_set_error(
&p->base,
VEC_INTERAL_ERROR
"vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid, expected,
actual);
rc = SQLITE_ERROR;
sqlite3_blob_close(blobVectors);
goto cleanup;
};
rc = vec0_write_vector_to_vector_blob(
blobVectors, chunk_offset, vectorDatas[i],
p->vector_columns[i].dimensions, p->vector_columns[i].element_type);
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
vec0_set_error(&p->base,
VEC_INTERAL_ERROR
"could not write vector blob on %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
rc = SQLITE_ERROR;
sqlite3_blob_close(blobVectors);
goto cleanup;
}
sqlite3_blob_close(blobVectors);
}
sqlite3_blob *blobChunksRowids = NULL;
// write the new rowid to the rowids column of the _chunks table
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
chunk_rowid, 1, &blobChunksRowids);
todo_assert(rc == SQLITE_OK);
todo_assert(sqlite3_blob_bytes(blobChunksRowids) ==
p->chunk_size * sizeof(i64));
if (rc != SQLITE_OK) {
// IMP: V09221_26060
vec0_set_error(&p->base,
VEC_INTERAL_ERROR "could not open rowids blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_rowid);
goto cleanup;
}
i64 expected = p->chunk_size * sizeof(i64);
i64 actual = sqlite3_blob_bytes(blobChunksRowids);
if (expected != actual) {
// IMP: V12779_29618
vec0_set_error(
&p->base,
VEC_INTERAL_ERROR
"rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
p->schemaName, p->shadowChunksName, chunk_rowid, expected, actual);
rc = SQLITE_ERROR;
sqlite3_blob_close(blobChunksRowids);
goto cleanup;
}
rc = sqlite3_blob_write(blobChunksRowids, &rowid, sizeof(i64),
chunk_offset * sizeof(i64));
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
vec0_set_error(
&p->base, VEC_INTERAL_ERROR "could not write rowids blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_rowid);
rc = SQLITE_ERROR;
sqlite3_blob_close(blobChunksRowids);
goto cleanup;
}
sqlite3_blob_close(blobChunksRowids);
// Now with all the vectors inserted, go back and update the _rowids table
// with the new chunk_rowid/chunk_offset values
sqlite3_reset(p->stmtRowidsUpdatePosition);
sqlite3_clear_bindings(p->stmtRowidsUpdatePosition);
sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 1, chunk_rowid);
sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 2, chunk_offset);
sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 3, rowid);
rc = sqlite3_step(p->stmtRowidsUpdatePosition);
todo_assert(rc == SQLITE_DONE);
if (rc != SQLITE_DONE) {
// IMP: V21925_05995
vec0_set_error(&p->base,
VEC_INTERAL_ERROR
"could not update rowids position for rowid=%lld, "
"chunk_rowid=%lld, chunk_offset=%lld",
rowid, chunk_rowid, chunk_offset);
rc = SQLITE_ERROR;
goto cleanup;
}
return SQLITE_OK;
rc = SQLITE_OK;
cleanup:
sqlite3_reset(p->stmtRowidsUpdatePosition);
sqlite3_clear_bindings(p->stmtRowidsUpdatePosition);
return rc;
}
/**
@ -4390,9 +4612,12 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
// Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow
// table
i64 rowid;
// Array to hold the vector data of the inserted row. Individual elements will
// have a lifetime bound to the argv[..] values.
void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS];
// Array to hold cleanup functions for vectorDatas[]
vector_cleanup cleanups[VEC0_MAX_VECTOR_COLUMNS];
// Rowid of the chunk in the _chunks shadow table that the row will be a part
// of.
@ -4402,12 +4627,12 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
// a write-able blob of the validity column for the given chunk. Used to mark
// validity bit
sqlite3_blob *blobChunksValidity;
sqlite3_blob *blobChunksValidity = NULL;
// buffer for the valididty column for the given chunk. TODO maybe not needed
// here?
const unsigned char *bufferChunksValidity;
const unsigned char *bufferChunksValidity = NULL;
int numReadVectors = 0;
vector_cleanup cleanups[VEC0_MAX_VECTOR_COLUMNS];
// read all the inserted vectors into vectorDatas, validate their lengths.
for (int i = 0; i < p->numVectorColumns; i++) {
sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_VECTORN_START + i];
@ -4415,61 +4640,96 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
char *pzError;
enum VectorElementType elementType;
int rc = vector_from_value(valueVector, &vectorDatas[i], &dimensions,
&elementType, &cleanups[i], &pzError);
todo_assert(rc == SQLITE_OK);
assert(elementType == p->vector_columns[i].element_type);
rc = vector_from_value(valueVector, &vectorDatas[i], &dimensions,
&elementType, &cleanups[i], &pzError);
if (rc != SQLITE_OK) {
// IMP: V06519_23358
vec0_set_error(
pVTab, "Inserted vector for the \"%.*s\" column is invalid: %s",
p->vector_columns[i].name_length, p->vector_columns[i].name, pzError);
rc = SQLITE_ERROR;
goto cleanup;
}
numReadVectors++;
if (elementType != p->vector_columns[i].element_type) {
// IMP: V08221_25059
vec0_set_error(
pVTab,
"Inserted vector for the \"%.*s\" column is expected to be of type "
"%s, but a %s vector was provided.",
p->vector_columns[i].name_length, p->vector_columns[i].name,
vector_subtype_name(p->vector_columns[i].element_type),
vector_subtype_name(elementType));
rc = SQLITE_ERROR;
goto cleanup;
}
if (dimensions != p->vector_columns[i].dimensions) {
sqlite3_free(pVTab->zErrMsg);
pVTab->zErrMsg = sqlite3_mprintf(
// IMP: V01145_17984
vec0_set_error(
pVTab,
"Dimension mismatch for inserted vector for the \"%.*s\" column. "
"Expected %d dimensions but received %d.",
p->vector_columns[i].name_length, p->vector_columns[i].name,
p->vector_columns[i].dimensions, dimensions);
return SQLITE_ERROR;
rc = SQLITE_ERROR;
goto cleanup;
}
}
// Cannot insert a value in the hidden "distance" column
if (sqlite3_value_type(argv[2 + vec0_column_distance_idx(p)]) !=
SQLITE_NULL) {
SET_VTAB_ERROR("TODO distance provided in INSERT operation.");
return SQLITE_ERROR;
// IMP: V24228_08298
vec0_set_error(pVTab,
"A value was provided for the hidden \"distance\" column.");
rc = SQLITE_ERROR;
goto cleanup;
}
// Cannot insert a value in the hidden "k" column
if (sqlite3_value_type(argv[2 + vec0_column_k_idx(p)]) != SQLITE_NULL) {
SET_VTAB_ERROR("TODO k provided in INSERT operation.");
return SQLITE_ERROR;
// TODO cleanups
// IMP: V11875_28713
vec0_set_error(pVTab, "A value was provided for the hidden \"k\" column.");
rc = SQLITE_ERROR;
goto cleanup;
}
// Step #1: Insert/get a rowid for this row, from the _rowids table.
rc = vec0Update_InsertRowidStep(p, argv[2 + VEC0_COLUMN_ID], &rowid);
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
goto cleanup;
}
// Step #2: Find the next "available" position in the _chunks table for this
// row.
rc = vec0Update_InsertNextAvailableStep(p, &chunk_rowid, &chunk_offset,
&blobChunksValidity,
&bufferChunksValidity);
todo_assert(rc == SQLITE_OK);
if (rc != SQLITE_OK) {
goto cleanup;
}
// Step #3: With the next available chunk position, write out all the vectors
// to their specified location.
rc = vec0Update_InsertWriteFinalStep(p, chunk_rowid, chunk_offset, rowid,
vectorDatas, blobChunksValidity,
bufferChunksValidity);
todo_assert(rc == SQLITE_OK);
for (int i = 0; i < p->numVectorColumns; i++) {
cleanups[i](vectorDatas[i]);
if (rc != SQLITE_OK) {
goto cleanup;
}
*pRowid = rowid;
rc = SQLITE_OK;
cleanup:
for (int i = 0; i < numReadVectors; i++) {
cleanups[i](vectorDatas[i]);
}
sqlite3_blob_close(blobChunksValidity);
sqlite3_free((void *)bufferChunksValidity);
*pRowid = rowid;
return SQLITE_OK;
return rc;
}
int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite_int64 rowid) {
@ -4566,10 +4826,9 @@ int vec0Update_UpdateOnRowid(sqlite3_vtab *pVTab, int argc,
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
"vectors", chunk_id, 1, &blobVectors);
todo_assert(rc == SQLITE_OK);
// TODO rename this functions
rc = vec0Update_InsertWriteFinalStepVectors(
blobVectors, vector, chunk_offset, p->vector_columns[i].dimensions,
p->vector_columns[i].element_type);
rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, vector,
p->vector_columns[i].dimensions,
p->vector_columns[i].element_type);
todo_assert(rc == SQLITE_OK);
sqlite3_blob_close(blobVectors);
}

View file

@ -479,6 +479,7 @@ def test_vec_quantize_i8():
).fetchone()[0]
assert vec_quantize_i8() == 111
@pytest.mark.skip(reason="TODO")
def test_vec_quantize_binary():
vec_quantize_binary = lambda *args, input="?": db.execute(
@ -535,6 +536,232 @@ def test_vec0_updates():
}
]
db.execute("create virtual table t1 using vec0(aaa float[4], chunk_size=8)")
db.execute(
"create virtual table txt_pk using vec0( txt_id text primary key, aaa float[4])"
)
# EVIDENCE-OF: V06519_23358 vec0 INSERT validates vector
with _raises(
'Inserted vector for the "aaa" column is invalid: Input must have type BLOB (compact format) or TEXT (JSON)'
):
db.execute("insert into t1 values (1, ?)", [None])
# EVIDENCE-OF: V08221_25059 vec0 INSERT validates vector type
with _raises(
'Inserted vector for the "aaa" column is expected to be of type float32, but a bit vector was provided.'
):
db.execute("insert into t1 values (1, vec_bit(?))", [b"\xff\xff\xff\xff"])
# EVIDENCE-OF: V01145_17984 vec0 INSERT validates vector dimension match
with _raises(
'Dimension mismatch for inserted vector for the "aaa" column. Expected 4 dimensions but received 3.'
):
db.execute("insert into t1 values (1, ?)", ["[1,2,3]"])
# EVIDENCE-OF: V24228_08298 vec0 INSERT ensure no value provided for "distance" hidden column.
with _raises('A value was provided for the hidden "distance" column.'):
db.execute("insert into t1(rowid, aaa, distance) values (1, '[1,2,3,4]', 1)")
# EVIDENCE-OF: V11875_28713 vec0 INSERT ensure no value provided for "distance" hidden column.
with _raises('A value was provided for the hidden "k" column.'):
db.execute("insert into t1(rowid, aaa, k) values (1, '[1,2,3,4]', 1)")
# EVIDENCE-OF: V17090_01160 vec0 INSERT duplicated int primary key raises uniqueness error
db.execute("insert into t1 values (1, '[1,1,1,1]')")
with _raises("UNIQUE constraint failed on t1 primary key"):
db.execute("insert into t1 values (1, '[2,2,2,2]')")
# similate error on rowids shadow table
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_INSERT, "t1_rowids"))
# EVIDENCE-OF: V04679_21517 vec0 INSERT failed on _rowid shadow insert raises error
with _raises("Error inserting into rowid shadow table: not authorized"):
db.execute("insert into t1 values (2, '[2,2,2,2]')")
db.set_authorizer(None)
db.execute("insert into t1 values (2, '[2,2,2,2]')")
# test inserts where no rowid is provided
db.execute("insert into t1(aaa) values ('[3,3,3,3]')")
# EVIDENCE-OF: V30855_14925 vec0 INSERT non-integer/text primary key value rauses error
with _raises("Only integers are allows for primary key values on t1"):
db.execute("insert into t1 values (1.2, '[4,4,4,4]')")
# similate error on rowids shadow table, when rowid is not provided
# EVIDENCE-OF: V15177_32015 vec0 INSERT error on _rowids shadow insert raises error
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_INSERT, "t1_rowids"))
with _raises("Error inserting into rowid shadow table: not authorized"):
db.execute("insert into t1(aaa) values ('[2,2,2,2]')")
db.set_authorizer(None)
# EVIDENCE-OF: V31559_15629 vec0 INSERT error on _chunks shadow insert raises error
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_READ, "t1_chunks", "chunk_id"))
with _raises("Internal sqlite-vec error: Could not find latest chunk"):
db.execute("insert into t1 values (999, '[2,2,2,2]')")
db.set_authorizer(None)
# EVIDENCE-OF: V22053_06123 vec0 INSERT error on reading validity blob
db.commit()
db.execute("begin")
db.execute("ALTER TABLE t1_chunks DROP COLUMN validity")
with _raises(
"Internal sqlite-vec error: could not open validity blob on main.t1_chunks.1"
):
db.execute("insert into t1 values (9999, '[2,2,2,2]')")
db.rollback()
# EVIDENCE-OF: V29362_13432 vec0 INSERT validity blob size mismatch with chunk_size
db.commit()
db.execute("begin")
db.execute("UPDATE t1_chunks SET validity = zeroblob(101)")
with _raises(
"Internal sqlite-vec error: validity blob size mismatch on main.t1_chunks.1, expected 1 but received 101."
):
db.execute("insert into t1 values (9999, '[2,2,2,2]')")
db.rollback()
# EVIDENCE-OF: V16386_00456 vec0 INSERT valdates vector blob column sizes
db.commit()
db.execute("begin")
db.execute("UPDATE t1_vector_chunks00 SET vectors = zeroblob(101)")
with _raises(
"Internal sqlite-vec error: vector blob size mismatch on main.t1_vector_chunks00.1. Expected 128, actual 101"
):
db.execute("insert into t1 values (9999, '[2,2,2,2]')")
db.rollback()
# EVIDENCE-OF: V09221_26060 vec0 INSERT rowids blob open error
db.commit()
db.execute("begin")
db.execute("ALTER TABLE t1_chunks DROP COLUMN rowids")
with _raises(
"Internal sqlite-vec error: could not open rowids blob on main.t1_chunks.1"
):
db.execute("insert into t1 values (9999, '[2,2,2,2]')")
db.rollback()
# EVIDENCE-OF: V12779_29618 vec0 INSERT rowids blob validates size
db.commit()
db.execute("begin")
db.execute("UPDATE t1_chunks SET rowids = zeroblob(101)")
with _raises(
"Internal sqlite-vec error: rowids blob size mismatch on main.t1_chunks.1. Expected 64, actual 101"
):
db.execute("insert into t1 values (9999, '[2,2,2,2]')")
db.rollback()
# EVIDENCE-OF: V21925_05995 vec0 INSERT error on "rowids update position" raises error
db.set_authorizer(
authorizer_deny_on(sqlite3.SQLITE_UPDATE, "t1_rowids", "chunk_id")
)
with _raises(
"Internal sqlite-vec error: could not update rowids position for rowid=9999, chunk_rowid=1, chunk_offset=3"
):
db.execute("insert into t1 values (9999, '[2,2,2,2]')")
db.set_authorizer(None)
########## testing inserts on text primary key tables ##########
# EVIDENCE-OF: V04200_21039 vec0 table with text primary key ensure text values
with _raises(
"The txt_pk virtual table was declared with a TEXT primary key, but a non-TEXT value was provided in an INSERT."
):
db.execute("insert into txt_pk(txt_id, aaa) values (1, '[1,2,3,4]')")
db.execute("insert into txt_pk(txt_id, aaa) values ('a', '[1,2,3,4]')")
# EVIDENCE-OF: V20497_04568 vec0 table with text primary key raises uniqueness error on duplicate values
with _raises("UNIQUE constraint failed on txt_pk primary key"):
db.execute("insert into txt_pk(txt_id, aaa) values ('a', '[5,6,7,8]')")
# EVIDENCE-OF: V24016_08086 vec0 table with text primary key raises error on rowid write error
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_INSERT, "txt_pk_rowids"))
with _raises("Error inserting into rowid shadow table: not authorized"):
db.execute("insert into txt_pk(txt_id, aaa) values ('b', '[2,2,2,2]')")
db.set_authorizer(None)
db.execute("insert into txt_pk(txt_id, aaa) values ('b', '[2,2,2,2]')")
def test_vec0_update_insert_errors2():
db = connect(EXT_PATH)
db.execute("create virtual table t1 using vec0(aaa float[4], chunk_size=8)")
db.execute(
"""
insert into t1(aaa) values
('[1,1,1,1]'),
('[2,1,1,1]'),
('[3,1,1,1]'),
('[4,1,1,1]'),
('[5,1,1,1]'),
('[6,1,1,1]')
"""
)
assert execute_all(db, "select * from t1_chunks") == [
{
"chunk_id": 1,
"rowids": b"\x01\x00\x00\x00\x00\x00\x00\x00"
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
+ b"\x03\x00\x00\x00\x00\x00\x00\x00"
+ b"\x04\x00\x00\x00\x00\x00\x00\x00"
+ b"\x05\x00\x00\x00\x00\x00\x00\x00"
+ b"\x06\x00\x00\x00\x00\x00\x00\x00"
+ b"\x00\x00\x00\x00\x00\x00\x00\x00"
+ b"\x00\x00\x00\x00\x00\x00\x00\x00",
"size": 8,
"validity": b"?", # 0b00111111
}
]
db.execute(
"""
insert into t1(aaa) values
('[7,1,1,1]'),
('[8,1,1,1]')
"""
)
assert execute_all(db, "select * from t1_chunks") == [
{
"chunk_id": 1,
"rowids": b"\x01\x00\x00\x00\x00\x00\x00\x00"
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
+ b"\x03\x00\x00\x00\x00\x00\x00\x00"
+ b"\x04\x00\x00\x00\x00\x00\x00\x00"
+ b"\x05\x00\x00\x00\x00\x00\x00\x00"
+ b"\x06\x00\x00\x00\x00\x00\x00\x00"
+ b"\x07\x00\x00\x00\x00\x00\x00\x00"
+ b"\x08\x00\x00\x00\x00\x00\x00\x00",
"size": 8,
"validity": b"\xff", # 0b11111111
}
]
# EVIDENCE-OF: V08441_25279 vec0 INSERT error on new chunk creation raises error
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_INSERT, "t1_chunks"))
with _raises("Internal sqlite-vec error: Could not insert a new vector chunk"):
db.execute("insert into t1(aaa) values ('[9,1,1,1]')")
db.set_authorizer(None)
def authorizer_deny_on(operation, x1, x2=None):
def _auth(op, p1, p2, p3, p4):
if op == operation and p1 == x1 and p2 == x2:
return sqlite3.SQLITE_DENY
return sqlite3.SQLITE_OK
return _auth
def authorizer_debug(op, p1, p2, p3, p4):
print(op, p1, p2, p3, p4)
return sqlite3.SQLITE_OK
from contextlib import contextmanager
@contextmanager
def _raises(message):
with pytest.raises(sqlite3.OperationalError, match=re.escape(message)):
yield
def test_vec_each():
vec_each_f32 = lambda *args: execute_all(