vec0 point and knn error handling

This commit is contained in:
Alex Garcia 2024-06-28 15:29:13 -07:00
parent 2fdd760dd1
commit a5525c9a5d
2 changed files with 366 additions and 131 deletions

View file

@ -3036,8 +3036,8 @@ int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid,
// TODO: test / evidence-of // TODO: test / evidence-of
sqlite3_bind_int64(pVtab->stmtRowidsGetChunkPosition, 1, rowid); sqlite3_bind_int64(pVtab->stmtRowidsGetChunkPosition, 1, rowid);
rc = sqlite3_step(pVtab->stmtRowidsGetChunkPosition); rc = sqlite3_step(pVtab->stmtRowidsGetChunkPosition);
if (rc == SQLITE_ROW) { if (rc != SQLITE_ROW) {
return SQLITE_ERROR; goto cleanup;
} }
sqlite3_value *value = sqlite3_value *value =
sqlite3_column_value(pVtab->stmtRowidsGetChunkPosition, 0); sqlite3_column_value(pVtab->stmtRowidsGetChunkPosition, 0);
@ -3050,7 +3050,44 @@ cleanup:
return rc; return rc;
} }
// TODO make sure callees use the return value of this function int vec0_rowid_from_id(vec0_vtab *p, sqlite3_value *valueId, i64 *rowid) {
sqlite3_stmt *stmt = NULL;
int rc;
char *zSql;
zSql = sqlite3_mprintf("SELECT rowid"
" FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE id = ?",
p->schemaName, p->tableName);
if (!zSql) {
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) {
goto cleanup;
}
sqlite3_bind_value(stmt, 1, valueId);
rc = sqlite3_step(stmt);
if (rc == SQLITE_DONE) {
rc = SQLITE_EMPTY;
goto cleanup;
}
if (rc != SQLITE_ROW) {
goto cleanup;
}
*rowid = sqlite3_column_int64(stmt, 0);
rc = sqlite3_step(stmt);
if (rc != SQLITE_DONE) {
goto cleanup;
}
rc = SQLITE_OK;
cleanup:
sqlite3_finalize(stmt);
return rc;
}
int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) { int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) {
if (!p->pkIsText) { if (!p->pkIsText) {
sqlite3_result_int64(context, rowid); sqlite3_result_int64(context, rowid);
@ -3085,31 +3122,59 @@ int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) {
int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx, int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx,
void **outVector, int *outVectorSize) { void **outVector, int *outVectorSize) {
int rc; int rc;
i64 chunk_id;
i64 chunk_offset;
size_t size;
void *buf = NULL;
int blobOffset;
assert((vector_column_idx >= 0) && assert((vector_column_idx >= 0) &&
(vector_column_idx < pVtab->numVectorColumns)); (vector_column_idx < pVtab->numVectorColumns));
sqlite3_bind_int64(pVtab->stmtRowidsGetChunkPosition, 1, rowid); sqlite3_bind_int64(pVtab->stmtRowidsGetChunkPosition, 1, rowid);
rc = sqlite3_step(pVtab->stmtRowidsGetChunkPosition); rc = sqlite3_step(pVtab->stmtRowidsGetChunkPosition);
if (rc == SQLITE_DONE) {
rc = SQLITE_EMPTY;
goto cleanup;
}
if (rc != SQLITE_ROW) { if (rc != SQLITE_ROW) {
vtab_set_error(&pVtab->base, "fuck"); // TODO vtab_set_error(&pVtab->base, "Could not find a row with id %lld", rowid);
rc = SQLITE_ERROR; rc = SQLITE_ERROR;
goto cleanup; goto cleanup;
} }
i64 chunk_id = sqlite3_column_int64(pVtab->stmtRowidsGetChunkPosition, 1); chunk_id = sqlite3_column_int64(pVtab->stmtRowidsGetChunkPosition, 1);
i64 chunk_offset = sqlite3_column_int64(pVtab->stmtRowidsGetChunkPosition, 2); chunk_offset = sqlite3_column_int64(pVtab->stmtRowidsGetChunkPosition, 2);
rc = sqlite3_blob_reopen(pVtab->vectorBlobs[vector_column_idx], chunk_id); rc = sqlite3_blob_reopen(pVtab->vectorBlobs[vector_column_idx], chunk_id);
todo_assert(rc == SQLITE_OK); if (rc != SQLITE_OK) {
size_t size = vtab_set_error(
vector_column_byte_size(pVtab->vector_columns[vector_column_idx]); &pVtab->base,
int blobOffset = chunk_offset * size; "Could not fetch vector data for %lld, reopening blob failed", rowid);
rc = SQLITE_ERROR;
goto cleanup;
}
size = vector_column_byte_size(pVtab->vector_columns[vector_column_idx]);
blobOffset = chunk_offset * size;
buf = sqlite3_malloc(size);
if (!buf) {
rc = SQLITE_ERROR;
goto cleanup;
}
void *buf = sqlite3_malloc(size);
todo_assert(buf);
rc = sqlite3_blob_read(pVtab->vectorBlobs[vector_column_idx], buf, size, rc = sqlite3_blob_read(pVtab->vectorBlobs[vector_column_idx], buf, size,
blobOffset); blobOffset);
todo_assert(rc == SQLITE_OK); if (rc != SQLITE_OK) {
sqlite3_free(buf);
buf = NULL;
vtab_set_error(
&pVtab->base,
"Could not fetch vector data for %lld, reading from blob failed",
rowid);
rc = SQLITE_ERROR;
goto cleanup;
}
*outVector = buf; *outVector = buf;
if (outVectorSize) { if (outVectorSize) {
@ -3273,15 +3338,15 @@ struct vec0_query_fullscan_data {
sqlite3_stmt *rowids_stmt; sqlite3_stmt *rowids_stmt;
i8 done; i8 done;
}; };
int vec0_query_fullscan_data_clear( void vec0_query_fullscan_data_clear(
struct vec0_query_fullscan_data *fullscan_data) { struct vec0_query_fullscan_data *fullscan_data) {
int rc; if (!fullscan_data)
return;
if (fullscan_data->rowids_stmt) { if (fullscan_data->rowids_stmt) {
rc = sqlite3_finalize(fullscan_data->rowids_stmt); sqlite3_finalize(fullscan_data->rowids_stmt);
todo_assert(rc == SQLITE_OK);
fullscan_data->rowids_stmt = NULL; fullscan_data->rowids_stmt = NULL;
} }
return SQLITE_OK;
} }
struct vec0_query_knn_data { struct vec0_query_knn_data {
@ -3292,7 +3357,10 @@ struct vec0_query_knn_data {
f32 *distances; f32 *distances;
i64 current_idx; i64 current_idx;
}; };
int vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) { void vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) {
if (!knn_data)
return;
if (knn_data->rowids) { if (knn_data->rowids) {
sqlite3_free(knn_data->rowids); sqlite3_free(knn_data->rowids);
knn_data->rowids = NULL; knn_data->rowids = NULL;
@ -3301,7 +3369,6 @@ int vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) {
sqlite3_free(knn_data->distances); sqlite3_free(knn_data->distances);
knn_data->distances = NULL; knn_data->distances = NULL;
} }
return SQLITE_OK;
} }
struct vec0_query_point_data { struct vec0_query_point_data {
@ -3310,6 +3377,8 @@ struct vec0_query_point_data {
int done; int done;
}; };
void vec0_query_point_data_clear(struct vec0_query_point_data *point_data) { void vec0_query_point_data_clear(struct vec0_query_point_data *point_data) {
if (!point_data)
return;
for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) { for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) {
sqlite3_free(point_data->vectors[i]); sqlite3_free(point_data->vectors[i]);
point_data->vectors[i] = NULL; point_data->vectors[i] = NULL;
@ -3326,17 +3395,6 @@ struct vec0_cursor {
struct vec0_query_point_data *point_data; struct vec0_query_point_data *point_data;
}; };
#define SET_VTAB_ERROR(msg) \
do { \
sqlite3_free(pVTab->zErrMsg); \
pVTab->zErrMsg = sqlite3_mprintf("%s", msg); \
} while (0)
#define SET_VTAB_CURSOR_ERROR(msg) \
do { \
sqlite3_free(pVtabCursor->pVtab->zErrMsg); \
pVtabCursor->pVtab->zErrMsg = sqlite3_mprintf("%s", msg); \
} while (0)
#define VEC_CONSTRUCTOR_ERROR "vec0 constructor error: " #define VEC_CONSTRUCTOR_ERROR "vec0 constructor error: "
static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
sqlite3_vtab **ppVtab, char **pzErr, bool isCreate) { sqlite3_vtab **ppVtab, char **pzErr, bool isCreate) {
@ -3780,21 +3838,21 @@ static int vec0Open(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
} }
static int vec0Close(sqlite3_vtab_cursor *cur) { static int vec0Close(sqlite3_vtab_cursor *cur) {
int rc;
vec0_cursor *pCur = (vec0_cursor *)cur; vec0_cursor *pCur = (vec0_cursor *)cur;
if (pCur->fullscan_data) { if (pCur->fullscan_data) {
rc = vec0_query_fullscan_data_clear(pCur->fullscan_data); vec0_query_fullscan_data_clear(pCur->fullscan_data);
todo_assert(rc == SQLITE_OK);
sqlite3_free(pCur->fullscan_data); sqlite3_free(pCur->fullscan_data);
pCur->fullscan_data = NULL;
} }
if (pCur->knn_data) { if (pCur->knn_data) {
rc = vec0_query_knn_data_clear(pCur->knn_data); vec0_query_knn_data_clear(pCur->knn_data);
todo_assert(rc == SQLITE_OK);
sqlite3_free(pCur->knn_data); sqlite3_free(pCur->knn_data);
pCur->knn_data = NULL;
} }
if (pCur->point_data) { if (pCur->point_data) {
vec0_query_point_data_clear(pCur->point_data); vec0_query_point_data_clear(pCur->point_data);
sqlite3_free(pCur->point_data); sqlite3_free(pCur->point_data);
pCur->point_data = NULL;
} }
sqlite3_free(pCur); sqlite3_free(pCur);
return SQLITE_OK; return SQLITE_OK;
@ -3849,7 +3907,8 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
if (op == SQLITE_INDEX_CONSTRAINT_MATCH && if (op == SQLITE_INDEX_CONSTRAINT_MATCH &&
vec0_column_idx_is_vector(p, iColumn)) { vec0_column_idx_is_vector(p, iColumn)) {
if (iMatchTerm > -1) { if (iMatchTerm > -1) {
// TODO only 1 match operator at a time vtab_set_error(
pVTab, "only 1 MATCH operator is allowed in a single vec0 query");
return SQLITE_ERROR; return SQLITE_ERROR;
} }
iMatchTerm = i; iMatchTerm = i;
@ -3860,7 +3919,11 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
} }
if (op == SQLITE_INDEX_CONSTRAINT_EQ && iColumn == VEC0_COLUMN_ID) { if (op == SQLITE_INDEX_CONSTRAINT_EQ && iColumn == VEC0_COLUMN_ID) {
if (vtabIn) { if (vtabIn) {
todo_assert(iRowidInTerm == -1); if (iRowidInTerm != -1) {
vtab_set_error(pVTab, "only 1 'rowid in (..)' operator is allowed in "
"a single vec0 query");
return SQLITE_ERROR;
}
iRowidInTerm = i; iRowidInTerm = i;
} else { } else {
@ -3873,35 +3936,36 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
} }
if (iMatchTerm >= 0) { if (iMatchTerm >= 0) {
if (iLimitTerm < 0 && iKTerm < 0) { if (iLimitTerm < 0 && iKTerm < 0) {
// TODO: error, match on vector1 should require a limit for KNN. right? vtab_set_error(
pVTab,
"A LIMIT or 'k = ?' constraint is required on vec0 knn queries.");
return SQLITE_ERROR; return SQLITE_ERROR;
} }
if (iLimitTerm >= 0 && iKTerm >= 0) { if (iLimitTerm >= 0 && iKTerm >= 0) {
vtab_set_error(pVTab, "Only LIMIT or 'k =?' can be provided, not both");
return SQLITE_ERROR; return SQLITE_ERROR;
} }
if (pIdxInfo->nOrderBy < 1) {
// TODO error, `ORDER BY DISTANCE required if (pIdxInfo->nOrderBy) {
SET_VTAB_ERROR("ORDER BY distance required"); if (pIdxInfo->nOrderBy > 1) {
return SQLITE_CONSTRAINT; vtab_set_error(pVTab, "Only a single 'ORDER BY distance' clause is "
} "allowed on vec0 KNN queries");
if (pIdxInfo->nOrderBy > 1) { return SQLITE_ERROR;
// TODO error, orderByConsumed is all or nothing, only 1 order by allowed }
SET_VTAB_ERROR("more than 1 ORDER BY clause provided"); if (pIdxInfo->aOrderBy[0].iColumn != vec0_column_distance_idx(p)) {
return SQLITE_CONSTRAINT; vtab_set_error(pVTab,
} "Only a single 'ORDER BY distance' clause is allowed on "
if (pIdxInfo->aOrderBy[0].iColumn != vec0_column_distance_idx(p)) { "vec0 KNN queries, not on other columns");
// TODO error, ORDER BY must be on column return SQLITE_ERROR;
SET_VTAB_ERROR("ORDER BY must be on the distance column"); }
return SQLITE_CONSTRAINT; if (pIdxInfo->aOrderBy[0].desc) {
} vtab_set_error(
if (pIdxInfo->aOrderBy[0].desc) { pVTab, "Only ascending in ORDER BY distance clause is supported, "
// TODO KNN should be ascending, is descending possible? "DESC is not supported yet.");
SET_VTAB_ERROR("Only ascending in ORDER BY distance clause is supported, " return SQLITE_ERROR;
"DESC is not supported yet."); }
return SQLITE_CONSTRAINT;
} }
pIdxInfo->orderByConsumed = 1;
pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = 1; pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = 1;
pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1; pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1;
if (iLimitTerm >= 0) { if (iLimitTerm >= 0) {
@ -4073,6 +4137,7 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
char *err; char *err;
rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType, rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType,
&cleanup, &err); &cleanup, &err);
todo_assert(rc == SQLITE_OK);
todo_assert(elementType == vector_column->element_type); todo_assert(elementType == vector_column->element_type);
todo_assert(dimensions == vector_column->dimensions); todo_assert(dimensions == vector_column->dimensions);
@ -4296,64 +4361,105 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
return SQLITE_OK; return SQLITE_OK;
} }
int vec0Filter_fullscan(vec0_cursor *pCur, vec0_vtab *p, int idxNum, int vec0Filter_fullscan(vec0_vtab *p, vec0_cursor *pCur) {
const char *idxStr, int argc, sqlite3_value **argv) {
UNUSED_PARAMETER(idxNum);
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(argc);
UNUSED_PARAMETER(argv);
int rc; int rc;
char *zSql; char *zSql;
struct vec0_query_fullscan_data *fullscan_data;
pCur->query_plan = SQLITE_VEC0_QUERYPLAN_FULLSCAN; fullscan_data = sqlite3_malloc(sizeof(*fullscan_data));
struct vec0_query_fullscan_data *fullscan_data =
sqlite3_malloc(sizeof(struct vec0_query_fullscan_data));
if (!fullscan_data) { if (!fullscan_data) {
return SQLITE_NOMEM; return SQLITE_NOMEM;
} }
memset(fullscan_data, 0, sizeof(struct vec0_query_fullscan_data)); memset(fullscan_data, 0, sizeof(*fullscan_data));
zSql = sqlite3_mprintf(" SELECT rowid " zSql = sqlite3_mprintf(" SELECT rowid "
" FROM " VEC0_SHADOW_ROWIDS_NAME " FROM " VEC0_SHADOW_ROWIDS_NAME
" ORDER by chunk_id, chunk_offset ", " ORDER by chunk_id, chunk_offset ",
p->schemaName, p->tableName); p->schemaName, p->tableName);
todo_assert(zSql); if (!zSql) {
rc = SQLITE_NOMEM;
goto error;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &fullscan_data->rowids_stmt, NULL); rc = sqlite3_prepare_v2(p->db, zSql, -1, &fullscan_data->rowids_stmt, NULL);
sqlite3_free(zSql); sqlite3_free(zSql);
todo_assert(rc == SQLITE_OK); if (rc != SQLITE_OK) {
rc = sqlite3_step(fullscan_data->rowids_stmt); // IMP: V09901_26739
fullscan_data->done = rc == SQLITE_DONE; vtab_set_error(&p->base, "Error preparing rowid scan: %s",
if (!(rc == SQLITE_ROW || rc == SQLITE_DONE)) { sqlite3_errmsg(p->db));
vec0_query_fullscan_data_clear(fullscan_data); goto error;
return SQLITE_ERROR;
} }
rc = sqlite3_step(fullscan_data->rowids_stmt);
// DONE when there's no rowids, ROW when there are, both "success"
if (!(rc == SQLITE_ROW || rc == SQLITE_DONE)) {
goto error;
}
fullscan_data->done = rc == SQLITE_DONE;
pCur->query_plan = SQLITE_VEC0_QUERYPLAN_FULLSCAN;
pCur->fullscan_data = fullscan_data; pCur->fullscan_data = fullscan_data;
return SQLITE_OK; return SQLITE_OK;
error:
vec0_query_fullscan_data_clear(fullscan_data);
sqlite3_free(fullscan_data);
return rc;
} }
int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int idxNum, int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int argc,
const char *idxStr, int argc, sqlite3_value **argv) { sqlite3_value **argv) {
UNUSED_PARAMETER(idxNum);
UNUSED_PARAMETER(idxStr);
int rc; int rc;
assert(argc == 1); assert(argc == 1);
i64 rowid = sqlite3_value_int64(argv[0]); i64 rowid;
struct vec0_query_point_data *point_data = NULL;
pCur->query_plan = SQLITE_VEC0_QUERYPLAN_POINT; point_data = sqlite3_malloc(sizeof(*point_data));
struct vec0_query_point_data *point_data =
sqlite3_malloc(sizeof(struct vec0_query_point_data));
if (!point_data) { if (!point_data) {
return SQLITE_NOMEM; rc = SQLITE_NOMEM;
goto error;
}
memset(point_data, 0, sizeof(*point_data));
if (p->pkIsText) {
rc = vec0_rowid_from_id(p, argv[0], &rowid);
if (rc == SQLITE_EMPTY) {
goto eof;
}
if (rc != SQLITE_OK) {
goto error;
}
} else {
rowid = sqlite3_value_int64(argv[0]);
} }
memset(point_data, 0, sizeof(struct vec0_query_point_data));
for (int i = 0; i < p->numVectorColumns; i++) { for (int i = 0; i < p->numVectorColumns; i++) {
rc = vec0_get_vector_data(p, rowid, i, &point_data->vectors[i], NULL); rc = vec0_get_vector_data(p, rowid, i, &point_data->vectors[i], NULL);
assert(rc == SQLITE_OK); if (rc == SQLITE_EMPTY) {
goto eof;
}
if (rc != SQLITE_OK) {
goto error;
}
} }
point_data->rowid = rowid; point_data->rowid = rowid;
point_data->done = 0; point_data->done = 0;
pCur->point_data = point_data; pCur->point_data = point_data;
pCur->query_plan = SQLITE_VEC0_QUERYPLAN_POINT;
return SQLITE_OK; return SQLITE_OK;
eof:
point_data->rowid = rowid;
point_data->done = 1;
pCur->point_data = point_data;
pCur->query_plan = SQLITE_VEC0_QUERYPLAN_POINT;
return SQLITE_OK;
error:
vec0_query_point_data_clear(point_data);
sqlite3_free(point_data);
return rc;
} }
static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
@ -4361,23 +4467,26 @@ static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
vec0_cursor *pCur = (vec0_cursor *)pVtabCursor; vec0_cursor *pCur = (vec0_cursor *)pVtabCursor;
vec0_vtab *p = (vec0_vtab *)pVtabCursor->pVtab; vec0_vtab *p = (vec0_vtab *)pVtabCursor->pVtab;
if (strcmp(idxStr, VEC0_QUERY_PLAN_FULLSCAN) == 0) { if (strcmp(idxStr, VEC0_QUERY_PLAN_FULLSCAN) == 0) {
return vec0Filter_fullscan(pCur, p, idxNum, idxStr, argc, argv); return vec0Filter_fullscan(p, pCur);
} else if (strncmp(idxStr, "knn:", 4) == 0) { } else if (strncmp(idxStr, "knn:", 4) == 0) {
return vec0Filter_knn(pCur, p, idxNum, idxStr, argc, argv); return vec0Filter_knn(pCur, p, idxNum, idxStr, argc, argv);
} else if (strcmp(idxStr, VEC0_QUERY_PLAN_POINT) == 0) { } else if (strcmp(idxStr, VEC0_QUERY_PLAN_POINT) == 0) {
return vec0Filter_point(pCur, p, idxNum, idxStr, argc, argv); return vec0Filter_point(pCur, p, argc, argv);
} else { } else {
SET_VTAB_CURSOR_ERROR("unknown idxStr"); vtab_set_error(pVtabCursor->pVtab, "unknown idxStr '%s'", idxStr);
return SQLITE_ERROR; return SQLITE_ERROR;
} }
} }
static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
UNUSED_PARAMETER(cur);
UNUSED_PARAMETER(pRowid);
vec0_cursor *pCur = (vec0_cursor *)cur; vec0_cursor *pCur = (vec0_cursor *)cur;
todo_assert(pCur->query_plan == SQLITE_VEC0_QUERYPLAN_POINT); if ((pCur->query_plan != SQLITE_VEC0_QUERYPLAN_POINT) ||
todo_assert(pCur->point_data); (!pCur->point_data)) {
vtab_set_error(
cur->pVtab,
"Internal sqlite-vec error: exepcted point query plan in vec0Rowid");
return SQLITE_ERROR;
}
*pRowid = pCur->point_data->rowid; *pRowid = pCur->point_data->rowid;
return SQLITE_OK; return SQLITE_OK;
} }
@ -4386,48 +4495,58 @@ static int vec0Next(sqlite3_vtab_cursor *cur) {
vec0_cursor *pCur = (vec0_cursor *)cur; vec0_cursor *pCur = (vec0_cursor *)cur;
switch (pCur->query_plan) { switch (pCur->query_plan) {
case SQLITE_VEC0_QUERYPLAN_FULLSCAN: { case SQLITE_VEC0_QUERYPLAN_FULLSCAN: {
todo_assert(pCur->fullscan_data); if (!pCur->fullscan_data) {
return SQLITE_ERROR;
}
int rc = sqlite3_step(pCur->fullscan_data->rowids_stmt); int rc = sqlite3_step(pCur->fullscan_data->rowids_stmt);
if (rc == SQLITE_DONE) { if (rc == SQLITE_DONE) {
pCur->fullscan_data->done = 1; pCur->fullscan_data->done = 1;
return SQLITE_OK; return SQLITE_OK;
} }
if (rc == SQLITE_ROW) { if (rc == SQLITE_ROW) {
// TODO error handle
return SQLITE_OK; return SQLITE_OK;
} }
return SQLITE_ERROR; return SQLITE_ERROR;
} }
case SQLITE_VEC0_QUERYPLAN_KNN: { case SQLITE_VEC0_QUERYPLAN_KNN: {
todo_assert(pCur->knn_data); if (!pCur->knn_data) {
return SQLITE_ERROR;
}
pCur->knn_data->current_idx++; pCur->knn_data->current_idx++;
return SQLITE_OK; return SQLITE_OK;
} }
case SQLITE_VEC0_QUERYPLAN_POINT: { case SQLITE_VEC0_QUERYPLAN_POINT: {
todo_assert(pCur->point_data); if (!pCur->point_data) {
return SQLITE_ERROR;
}
pCur->point_data->done = 1; pCur->point_data->done = 1;
return SQLITE_OK; return SQLITE_OK;
} }
default: {
todo("point next impl");
}
} }
return SQLITE_ERROR;
} }
static int vec0Eof(sqlite3_vtab_cursor *cur) { static int vec0Eof(sqlite3_vtab_cursor *cur) {
vec0_cursor *pCur = (vec0_cursor *)cur; vec0_cursor *pCur = (vec0_cursor *)cur;
switch (pCur->query_plan) { switch (pCur->query_plan) {
case SQLITE_VEC0_QUERYPLAN_FULLSCAN: { case SQLITE_VEC0_QUERYPLAN_FULLSCAN: {
todo_assert(pCur->fullscan_data); if (!pCur->fullscan_data) {
return 1;
}
return pCur->fullscan_data->done; return pCur->fullscan_data->done;
} }
case SQLITE_VEC0_QUERYPLAN_KNN: { case SQLITE_VEC0_QUERYPLAN_KNN: {
todo_assert(pCur->knn_data); if (!pCur->knn_data) {
return 1;
}
return (pCur->knn_data->current_idx >= pCur->knn_data->k) || return (pCur->knn_data->current_idx >= pCur->knn_data->k) ||
(pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX); (pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX);
} }
case SQLITE_VEC0_QUERYPLAN_POINT: { case SQLITE_VEC0_QUERYPLAN_POINT: {
todo_assert(pCur->point_data); if (!pCur->point_data) {
return 1;
}
return pCur->point_data->done; return pCur->point_data->done;
} }
} }
@ -4435,21 +4554,26 @@ static int vec0Eof(sqlite3_vtab_cursor *cur) {
static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur, static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
sqlite3_context *context, int i) { sqlite3_context *context, int i) {
todo_assert(pCur->fullscan_data); if (!pCur->fullscan_data) {
sqlite3_result_error(
context, "Internal sqlite-vec error: fullscan_data is NULL.", -1);
return SQLITE_ERROR;
}
i64 rowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0); i64 rowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0);
if (i == VEC0_COLUMN_ID) { if (i == VEC0_COLUMN_ID) {
vec0_result_id(pVtab, context, rowid); return vec0_result_id(pVtab, context, rowid);
} else if (vec0_column_idx_is_vector(pVtab, i)) { } else if (vec0_column_idx_is_vector(pVtab, i)) {
void *v; void *v;
int sz; int sz;
int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i); int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
int rc = vec0_get_vector_data(pVtab, rowid, vector_idx, &v, &sz); int rc = vec0_get_vector_data(pVtab, rowid, vector_idx, &v, &sz);
todo_assert(rc == SQLITE_OK); if (rc != SQLITE_OK) {
sqlite3_result_blob(context, v, sz, SQLITE_TRANSIENT); return rc;
}
sqlite3_result_blob(context, v, sz, sqlite3_free);
sqlite3_result_subtype(context, sqlite3_result_subtype(context,
pVtab->vector_columns[vector_idx].element_type); pVtab->vector_columns[vector_idx].element_type);
sqlite3_free(v);
} else if (i == vec0_column_distance_idx(pVtab)) { } else if (i == vec0_column_distance_idx(pVtab)) {
sqlite3_result_null(context); sqlite3_result_null(context);
} else { } else {
@ -4460,16 +4584,18 @@ static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur, static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
sqlite3_context *context, int i) { sqlite3_context *context, int i) {
todo_assert(pCur->point_data); if (!pCur->point_data) {
sqlite3_result_error(context,
"Internal sqlite-vec error: point_data is NULL.", -1);
return SQLITE_ERROR;
}
if (i == VEC0_COLUMN_ID) { if (i == VEC0_COLUMN_ID) {
vec0_result_id(pVtab, context, pCur->point_data->rowid); return vec0_result_id(pVtab, context, pCur->point_data->rowid);
return SQLITE_OK;
} }
if (i == vec0_column_distance_idx(pVtab)) { if (i == vec0_column_distance_idx(pVtab)) {
sqlite3_result_null(context); sqlite3_result_null(context);
return SQLITE_OK; return SQLITE_OK;
} }
// TODO only have 1st vector data
if (vec0_column_idx_is_vector(pVtab, i)) { if (vec0_column_idx_is_vector(pVtab, i)) {
if (sqlite3_vtab_nochange(context)) { if (sqlite3_vtab_nochange(context)) {
sqlite3_result_null(context); sqlite3_result_null(context);
@ -4490,11 +4616,14 @@ static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur, static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur,
sqlite3_context *context, int i) { sqlite3_context *context, int i) {
todo_assert(pCur->knn_data); if (!pCur->knn_data) {
sqlite3_result_error(context,
"Internal sqlite-vec error: knn_data is NULL.", -1);
return SQLITE_ERROR;
}
if (i == VEC0_COLUMN_ID) { if (i == VEC0_COLUMN_ID) {
i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
vec0_result_id(pVtab, context, rowid); return vec0_result_id(pVtab, context, rowid);
return SQLITE_OK;
} }
if (i == vec0_column_distance_idx(pVtab)) { if (i == vec0_column_distance_idx(pVtab)) {
sqlite3_result_double( sqlite3_result_double(
@ -5259,7 +5388,6 @@ int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite_int64 rowid) {
// 5. Delete value in _rowids table // 5. Delete value in _rowids table
// 1. get chunk_id and chunk_offset from _rowids // 1. get chunk_id and chunk_offset from _rowids
// TODO how to make this fail without failing the point query
rc = vec0_get_chunk_position(p, rowid, &chunk_id, &chunk_offset); rc = vec0_get_chunk_position(p, rowid, &chunk_id, &chunk_offset);
if (rc != SQLITE_OK) { if (rc != SQLITE_OK) {
return rc; return rc;
@ -5427,10 +5555,8 @@ static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
vtab_set_error(pVTab, vtab_set_error(pVTab,
"UPDATE operation on rowids with vec0 is not supported."); "UPDATE operation on rowids with vec0 is not supported.");
return SQLITE_ERROR; return SQLITE_ERROR;
} } else {
// unknown operation vtab_set_error(pVTab, "Unrecognized xUpdate operation provided for vec0.");
else {
SET_VTAB_ERROR("Unrecognized xUpdate operation provided for vec0.");
return SQLITE_ERROR; return SQLITE_ERROR;
} }
} }

View file

@ -16,6 +16,8 @@ EXT_PATH = "./dist/vec0"
SUPPORTS_SUBTYPE = sqlite3.sqlite_version_info[1] > 38 SUPPORTS_SUBTYPE = sqlite3.sqlite_version_info[1] > 38
SUPPORTS_DROP_COLUMN = sqlite3.sqlite_version_info[1] >= 35 SUPPORTS_DROP_COLUMN = sqlite3.sqlite_version_info[1] >= 35
SUPPORTS_VTAB_IN = sqlite3.sqlite_version_info[1] >= 38
SUPPORTS_VTAB_LIMIT = sqlite3.sqlite_version_info[1] >= 41
def bitmap_full(n: int) -> bytearray: def bitmap_full(n: int) -> bytearray:
@ -1133,38 +1135,138 @@ def test_vec0_updates():
# ] # ]
def test_vec0_point():
db = connect(EXT_PATH)
db.execute("CREATE VIRTUAL TABLE t USING vec0(a float[1], b float[1])")
db.execute(
"INSERT INTO t VALUES (1, X'AABBCCDD', X'00112233'), (2, X'AABBCCDD', X'99887766');"
)
assert execute_all(db, "select * from t where rowid = 1") == [
{
"a": b"\xaa\xbb\xcc\xdd",
"b": b'\x00\x11"3',
"rowid": 1,
}
]
assert execute_all(db, "select * from t where rowid = 999") == []
db.execute(
"CREATE VIRTUAL TABLE t2 USING vec0(id text primary key, a float[1], b float[1])"
)
db.execute(
"INSERT INTO t2 VALUES ('A', X'AABBCCDD', X'00112233'), ('B', X'AABBCCDD', X'99887766');"
)
assert execute_all(db, "select * from t2 where id = 'A'") == [
{
"a": b"\xaa\xbb\xcc\xdd",
"b": b'\x00\x11"3',
"id": "A",
}
]
assert execute_all(db, "select * from t2 where id = 'xxx'") == []
def test_vec0_text_pk(): def test_vec0_text_pk():
db = connect(EXT_PATH) db = connect(EXT_PATH)
db.execute( db.execute(
""" """
create virtual table t using vec0( create virtual table t using vec0(
t_id text primary key, t_id text primary key,
aaa float[8], aaa float[1],
bbb float8[8] bbb float8[1]
); );
""" """
) )
assert execute_all(db, "select * from t") == []
with _raises(
"The t virtual table was declared with a TEXT primary key, but a non-TEXT value was provided in an INSERT."
):
db.execute("INSERT INTO t VALUES (1, X'AABBCCDD', X'AABBCCDD')")
db.executemany( db.executemany(
"INSERT INTO t VALUES (:t_id, :aaa, :bbb)", "INSERT INTO t VALUES (:t_id, :aaa, :bbb)",
[ [
{ {
"t_id": "t_1", "t_id": "t_1",
"aaa": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]", "aaa": "[.1]",
"bbb": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]", "bbb": "[-.1]",
}, },
{ {
"t_id": "t_2", "t_id": "t_2",
"aaa": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]", "aaa": "[.2]",
"bbb": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]", "bbb": "[-.2]",
}, },
{ {
"t_id": "t_3", "t_id": "t_3",
"aaa": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]", "aaa": "[.3]",
"bbb": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]", "bbb": "[-.3]",
}, },
], ],
) )
assert execute_all(db, "select * from t") == [] assert execute_all(db, "select t_id from t") == [
{"t_id": "t_1"},
{"t_id": "t_2"},
{"t_id": "t_3"},
]
assert execute_all(db, "select * from t") == [
{"t_id": "t_1", "aaa": _f32([0.1]), "bbb": _f32([-0.1])},
{"t_id": "t_2", "aaa": _f32([0.2]), "bbb": _f32([-0.2])},
{"t_id": "t_3", "aaa": _f32([0.3]), "bbb": _f32([-0.3])},
]
# EVIDENCE-OF: V09901_26739 vec0 full scan catches _rowid prep error
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_READ, "t_rowids", "rowid"))
with _raises(
"Error preparing rowid scan: access to t_rowids.rowid is prohibited",
sqlite3.DatabaseError,
):
db.execute("select * from t")
db.set_authorizer(None)
def test_vec0_best_index():
db = connect(EXT_PATH)
db.execute(
"""
create virtual table t using vec0(
aaa float[1],
bbb float8[1]
);
"""
)
with _raises("only 1 MATCH operator is allowed in a single vec0 query"):
db.execute("select * from t where aaa match NULL and bbb match NULL")
if SUPPORTS_VTAB_IN:
with _raises(
"only 1 'rowid in (..)' operator is allowed in a single vec0 query"
):
db.execute("select * from t where rowid in(4,5,6) and rowid in (1, 2,3)")
with _raises("A LIMIT or 'k = ?' constraint is required on vec0 knn queries."):
db.execute("select * from t where aaa MATCH ?")
with _raises("Only LIMIT or 'k =?' can be provided, not both"):
db.execute("select * from t where aaa MATCH ? and k = 10 limit 20")
with _raises(
"Only a single 'ORDER BY distance' clause is allowed on vec0 KNN queries"
):
db.execute(
"select * from t where aaa MATCH NULL and k = 10 order by distance, distance"
)
with _raises(
"Only ascending in ORDER BY distance clause is supported, DESC is not supported yet."
):
db.execute(
"select * from t where aaa MATCH NULL and k = 10 order by distance desc"
)
def authorizer_deny_on(operation, x1, x2=None): def authorizer_deny_on(operation, x1, x2=None):
@ -1610,6 +1712,13 @@ def test_smoke():
"select * from vec_xyz where a match X'' and k = 10 order by distance" "select * from vec_xyz where a match X'' and k = 10 order by distance"
), ),
) )
if SUPPORTS_VTAB_LIMIT:
assert re.match(
"SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:knn:",
explain_query_plan(
"select * from vec_xyz where a match X'' order by distance limit 10"
),
)
assert re.match( assert re.match(
"SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:fullscan", "SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:fullscan",
explain_query_plan("select * from vec_xyz"), explain_query_plan("select * from vec_xyz"),