mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Support constaints on distance column in KNN queries, for pagination and range queries (#166)
* Initial pass, needs tests+docs * old: test-knn-constraints * cleanup
This commit is contained in:
parent
a2dd24f27e
commit
611ca631ea
3 changed files with 492 additions and 2 deletions
139
sqlite-vec.c
139
sqlite-vec.c
|
|
@ -5305,11 +5305,21 @@ static int vec0Close(sqlite3_vtab_cursor *cur) {
|
|||
typedef enum {
|
||||
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
|
||||
|
||||
// ~~~ KNN QUERIES ~~~ //
|
||||
VEC0_IDXSTR_KIND_KNN_MATCH = '{',
|
||||
VEC0_IDXSTR_KIND_KNN_K = '}',
|
||||
VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[',
|
||||
// argv[i] is a constraint on a PARTITON KEY column in a KNN query
|
||||
//
|
||||
VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']',
|
||||
|
||||
// argv[i] is a constraint on the distance column in a KNN query
|
||||
VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT = '*',
|
||||
|
||||
// ~~~ POINT QUERIES ~~~ //
|
||||
VEC0_IDXSTR_KIND_POINT_ID = '!',
|
||||
|
||||
// ~~~ ??? ~~~ //
|
||||
VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&',
|
||||
} vec0_idxstr_kind;
|
||||
|
||||
|
|
@ -5318,11 +5328,22 @@ typedef enum {
|
|||
typedef enum {
|
||||
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
|
||||
|
||||
// Equality constraint on a PARTITON KEY column, ex `user_id = 123`
|
||||
VEC0_PARTITION_OPERATOR_EQ = 'a',
|
||||
|
||||
// "Greater than" constraint on a PARTITON KEY column, ex `year > 2024`
|
||||
VEC0_PARTITION_OPERATOR_GT = 'b',
|
||||
|
||||
// "Less than or equal to" constraint on a PARTITON KEY column, ex `year <= 2024`
|
||||
VEC0_PARTITION_OPERATOR_LE = 'c',
|
||||
|
||||
// "Less than" constraint on a PARTITON KEY column, ex `year < 2024`
|
||||
VEC0_PARTITION_OPERATOR_LT = 'd',
|
||||
|
||||
// "Greater than or equal to" constraint on a PARTITON KEY column, ex `year >= 2024`
|
||||
VEC0_PARTITION_OPERATOR_GE = 'e',
|
||||
|
||||
// "Not equal to" constraint on a PARTITON KEY column, ex `year != 2024`
|
||||
VEC0_PARTITION_OPERATOR_NE = 'f',
|
||||
} vec0_partition_operator;
|
||||
typedef enum {
|
||||
|
|
@ -5335,6 +5356,15 @@ typedef enum {
|
|||
VEC0_METADATA_OPERATOR_IN = 'g',
|
||||
} vec0_metadata_operator;
|
||||
|
||||
|
||||
typedef enum {
|
||||
|
||||
VEC0_DISTANCE_CONSTRAINT_GT = 'a',
|
||||
VEC0_DISTANCE_CONSTRAINT_GE = 'b',
|
||||
VEC0_DISTANCE_CONSTRAINT_LT = 'c',
|
||||
VEC0_DISTANCE_CONSTRAINT_LE = 'd',
|
||||
} vec0_distance_constraint_operator;
|
||||
|
||||
static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
|
||||
vec0_vtab *p = (vec0_vtab *)pVTab;
|
||||
/**
|
||||
|
|
@ -5494,6 +5524,7 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
|
|||
}
|
||||
#endif
|
||||
|
||||
// find any PARTITION KEY column constraints
|
||||
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
|
||||
if (!pIdxInfo->aConstraint[i].usable)
|
||||
continue;
|
||||
|
|
@ -5548,6 +5579,7 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
|
|||
|
||||
}
|
||||
|
||||
// find any metadata column constraints
|
||||
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
|
||||
if (!pIdxInfo->aConstraint[i].usable)
|
||||
continue;
|
||||
|
|
@ -5644,6 +5676,58 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
|
|||
|
||||
}
|
||||
|
||||
// find any distance column constraints
|
||||
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
|
||||
if (!pIdxInfo->aConstraint[i].usable)
|
||||
continue;
|
||||
|
||||
int iColumn = pIdxInfo->aConstraint[i].iColumn;
|
||||
int op = pIdxInfo->aConstraint[i].op;
|
||||
if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
|
||||
continue;
|
||||
}
|
||||
if(vec0_column_distance_idx(p) != iColumn) {
|
||||
continue;
|
||||
}
|
||||
|
||||
char value = 0;
|
||||
switch(op) {
|
||||
case SQLITE_INDEX_CONSTRAINT_GT: {
|
||||
value = VEC0_DISTANCE_CONSTRAINT_GT;
|
||||
break;
|
||||
}
|
||||
case SQLITE_INDEX_CONSTRAINT_GE: {
|
||||
value = VEC0_DISTANCE_CONSTRAINT_GE;
|
||||
break;
|
||||
}
|
||||
case SQLITE_INDEX_CONSTRAINT_LT: {
|
||||
value = VEC0_DISTANCE_CONSTRAINT_LT;
|
||||
break;
|
||||
}
|
||||
case SQLITE_INDEX_CONSTRAINT_LE: {
|
||||
value = VEC0_DISTANCE_CONSTRAINT_LE;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// IMP TODO
|
||||
rc = SQLITE_ERROR;
|
||||
vtab_set_error(
|
||||
pVTab,
|
||||
"Illegal WHERE constraint on distance column in a KNN query. "
|
||||
"Only one of GT, GE, LT, LE constraints are allowed."
|
||||
);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
|
||||
pIdxInfo->aConstraintUsage[i].omit = 1;
|
||||
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT);
|
||||
sqlite3_str_appendchar(idxStr, 1, value);
|
||||
sqlite3_str_appendchar(idxStr, 1, '_');
|
||||
sqlite3_str_appendchar(idxStr, 1, '_');
|
||||
}
|
||||
|
||||
|
||||
|
||||
pIdxInfo->idxNum = iMatchVectorTerm;
|
||||
|
|
@ -5672,7 +5756,6 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
|
|||
}
|
||||
pIdxInfo->needToFreeIdxStr = 1;
|
||||
|
||||
|
||||
rc = SQLITE_OK;
|
||||
|
||||
done:
|
||||
|
|
@ -6560,12 +6643,15 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
|
|||
int numValueEntries = (idxStrLength-1) / 4;
|
||||
assert(numValueEntries == argc);
|
||||
int hasMetadataFilters = 0;
|
||||
int hasDistanceConstraints = 0;
|
||||
for(int i = 0; i < argc; i++) {
|
||||
int idx = 1 + (i * 4);
|
||||
char kind = idxStr[idx + 0];
|
||||
if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
|
||||
hasMetadataFilters = 1;
|
||||
break;
|
||||
}
|
||||
else if(kind == VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT) {
|
||||
hasDistanceConstraints = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -6752,6 +6838,55 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
|
|||
chunk_distances[i] = result;
|
||||
}
|
||||
|
||||
if(hasDistanceConstraints) {
|
||||
for(int i = 0; i < argc; i++) {
|
||||
int idx = 1 + (i * 4);
|
||||
char kind = idxStr[idx + 0];
|
||||
// TODO casts f64 to f32, is that a problem?
|
||||
f32 target = (f32) sqlite3_value_double(argv[i]);
|
||||
|
||||
if(kind != VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT) {
|
||||
continue;
|
||||
}
|
||||
vec0_distance_constraint_operator op = idxStr[idx + 1];
|
||||
|
||||
switch(op) {
|
||||
case VEC0_DISTANCE_CONSTRAINT_GE: {
|
||||
for(int i = 0; i < p->chunk_size;i++) {
|
||||
if(bitmap_get(b, i) && !(chunk_distances[i] >= target)) {
|
||||
bitmap_set(b, i, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VEC0_DISTANCE_CONSTRAINT_GT: {
|
||||
for(int i = 0; i < p->chunk_size;i++) {
|
||||
if(bitmap_get(b, i) && !(chunk_distances[i] > target)) {
|
||||
bitmap_set(b, i, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VEC0_DISTANCE_CONSTRAINT_LE: {
|
||||
for(int i = 0; i < p->chunk_size;i++) {
|
||||
if(bitmap_get(b, i) && !(chunk_distances[i] <= target)) {
|
||||
bitmap_set(b, i, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VEC0_DISTANCE_CONSTRAINT_LT: {
|
||||
for(int i = 0; i < p->chunk_size;i++) {
|
||||
if(bitmap_get(b, i) && !(chunk_distances[i] < target)) {
|
||||
bitmap_set(b, i, 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int used1;
|
||||
min_idx(chunk_distances, p->chunk_size, b, chunk_topk_idxs,
|
||||
min(k, p->chunk_size), bTaken, &used1);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue