mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
fmt and SQLITE_VEC_OMIT_FS fixes
This commit is contained in:
parent
abf59c418e
commit
7ea402931e
2 changed files with 174 additions and 148 deletions
179
sqlite-vec.c
179
sqlite-vec.c
|
|
@ -1,4 +1,5 @@
|
||||||
#include "sqlite-vec.h"
|
#include "sqlite-vec.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
|
@ -7,10 +8,13 @@
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifndef SQLITE_VEC_OMIT_FS
|
||||||
|
#include <stdio.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "sqlite3ext.h"
|
#include "sqlite3ext.h"
|
||||||
SQLITE_EXTENSION_INIT1
|
SQLITE_EXTENSION_INIT1
|
||||||
|
|
||||||
|
|
@ -79,7 +83,8 @@ typedef size_t usize;
|
||||||
#define UNUSED_PARAMETER(X) (void)(X)
|
#define UNUSED_PARAMETER(X) (void)(X)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22) https://www.sqlite.org/changes.html#version_3_38_0
|
// sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22)
|
||||||
|
// https://www.sqlite.org/changes.html#version_3_38_0
|
||||||
#if SQLITE_VERSION_NUMBER >= 3038000
|
#if SQLITE_VERSION_NUMBER >= 3038000
|
||||||
#define COMPILER_SUPPORTS_VTAB_IN 1
|
#define COMPILER_SUPPORTS_VTAB_IN 1
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -93,7 +98,7 @@ typedef size_t usize;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SQLITE_INDEX_CONSTRAINT_LIMIT
|
#ifndef SQLITE_INDEX_CONSTRAINT_LIMIT
|
||||||
#define SQLITE_INDEX_CONSTRAINT_LIMIT 73
|
#define SQLITE_INDEX_CONSTRAINT_LIMIT 73
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define countof(x) (sizeof(x) / sizeof((x)[0]))
|
#define countof(x) (sizeof(x) / sizeof((x)[0]))
|
||||||
|
|
@ -505,9 +510,10 @@ static f32 distance_hamming_u8(u8 *a, u8 *b, size_t n) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#if !defined(__clang__) && \
|
#if !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
|
||||||
(defined(_M_ARM) || defined(_M_ARM64))
|
// From
|
||||||
// From https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c, line 34-43
|
// https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c,
|
||||||
|
// line 34-43
|
||||||
static unsigned int __builtin_popcountl(unsigned int x) {
|
static unsigned int __builtin_popcountl(unsigned int x) {
|
||||||
unsigned int c = 0;
|
unsigned int c = 0;
|
||||||
for (; x; ++c) {
|
for (; x; ++c) {
|
||||||
|
|
@ -1037,6 +1043,7 @@ struct VecNpyFile {
|
||||||
};
|
};
|
||||||
#define SQLITE_VEC_NPY_FILE_NAME "vec0-npy-file"
|
#define SQLITE_VEC_NPY_FILE_NAME "vec0-npy-file"
|
||||||
|
|
||||||
|
#ifndef SQLITE_VEC_OMIT_FS
|
||||||
static void vec_npy_file(sqlite3_context *context, int argc,
|
static void vec_npy_file(sqlite3_context *context, int argc,
|
||||||
sqlite3_value **argv) {
|
sqlite3_value **argv) {
|
||||||
assert(argc == 1);
|
assert(argc == 1);
|
||||||
|
|
@ -1055,6 +1062,7 @@ static void vec_npy_file(sqlite3_context *context, int argc,
|
||||||
f->pathLength = pathLength;
|
f->pathLength = pathLength;
|
||||||
sqlite3_result_pointer(context, f, SQLITE_VEC_NPY_FILE_NAME, sqlite3_free);
|
sqlite3_result_pointer(context, f, SQLITE_VEC_NPY_FILE_NAME, sqlite3_free);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#pragma region scalar functions
|
#pragma region scalar functions
|
||||||
static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) {
|
static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) {
|
||||||
|
|
@ -2648,7 +2656,9 @@ struct vec_npy_each_cursor {
|
||||||
|
|
||||||
// Opened npy file, when reading from a file.
|
// Opened npy file, when reading from a file.
|
||||||
// fclose() when complete.
|
// fclose() when complete.
|
||||||
|
#ifndef SQLITE_VEC_OMIT_FS
|
||||||
FILE *file;
|
FILE *file;
|
||||||
|
#endif
|
||||||
|
|
||||||
// an in-memory buffer containing a portion of the npy array.
|
// an in-memory buffer containing a portion of the npy array.
|
||||||
// Used for faster reading, instead of calling fread a lot.
|
// Used for faster reading, instead of calling fread a lot.
|
||||||
|
|
@ -2856,12 +2866,12 @@ static int vec_npy_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
|
||||||
|
|
||||||
static int vec_npy_eachClose(sqlite3_vtab_cursor *cur) {
|
static int vec_npy_eachClose(sqlite3_vtab_cursor *cur) {
|
||||||
vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
|
vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)cur;
|
||||||
if (pCur->file) {
|
|
||||||
#ifndef SQLITE_VEC_OMIT_FS
|
#ifndef SQLITE_VEC_OMIT_FS
|
||||||
|
if (pCur->file) {
|
||||||
fclose(pCur->file);
|
fclose(pCur->file);
|
||||||
#endif
|
|
||||||
pCur->file = NULL;
|
pCur->file = NULL;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if (pCur->chunksBuffer) {
|
if (pCur->chunksBuffer) {
|
||||||
sqlite3_free(pCur->chunksBuffer);
|
sqlite3_free(pCur->chunksBuffer);
|
||||||
pCur->chunksBuffer = NULL;
|
pCur->chunksBuffer = NULL;
|
||||||
|
|
@ -2912,12 +2922,12 @@ static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
|
||||||
|
|
||||||
vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)pVtabCursor;
|
vec_npy_each_cursor *pCur = (vec_npy_each_cursor *)pVtabCursor;
|
||||||
|
|
||||||
if (pCur->file) {
|
|
||||||
#ifndef SQLITE_VEC_OMIT_FS
|
#ifndef SQLITE_VEC_OMIT_FS
|
||||||
|
if (pCur->file) {
|
||||||
fclose(pCur->file);
|
fclose(pCur->file);
|
||||||
#endif
|
|
||||||
pCur->file = NULL;
|
pCur->file = NULL;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if (pCur->chunksBuffer) {
|
if (pCur->chunksBuffer) {
|
||||||
sqlite3_free(pCur->chunksBuffer);
|
sqlite3_free(pCur->chunksBuffer);
|
||||||
pCur->chunksBuffer = NULL;
|
pCur->chunksBuffer = NULL;
|
||||||
|
|
@ -2926,9 +2936,8 @@ static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
|
||||||
pCur->vector = NULL;
|
pCur->vector = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct VecNpyFile *f = NULL;
|
|
||||||
|
|
||||||
#ifndef SQLITE_VEC_OMIT_FS
|
#ifndef SQLITE_VEC_OMIT_FS
|
||||||
|
struct VecNpyFile *f = NULL;
|
||||||
if ((f = sqlite3_value_pointer(argv[0], SQLITE_VEC_NPY_FILE_NAME))) {
|
if ((f = sqlite3_value_pointer(argv[0], SQLITE_VEC_NPY_FILE_NAME))) {
|
||||||
FILE *file = fopen(f->path, "r");
|
FILE *file = fopen(f->path, "r");
|
||||||
if (!file) {
|
if (!file) {
|
||||||
|
|
@ -3293,12 +3302,26 @@ void vec0_free(vec0_vtab *p) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int vec0_column_distance_idx(vec0_vtab *pVtab) {
|
/**
|
||||||
return VEC0_COLUMN_VECTORN_START + (pVtab->numVectorColumns - 1) +
|
* @brief Returns the index of the distance hidden column for the given vec0
|
||||||
|
* table.
|
||||||
|
*
|
||||||
|
* @param p vec0 table
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
int vec0_column_distance_idx(vec0_vtab *p) {
|
||||||
|
return VEC0_COLUMN_VECTORN_START + (p->numVectorColumns - 1) +
|
||||||
VEC0_COLUMN_OFFSET_DISTANCE;
|
VEC0_COLUMN_OFFSET_DISTANCE;
|
||||||
}
|
}
|
||||||
int vec0_column_k_idx(vec0_vtab *pVtab) {
|
|
||||||
return VEC0_COLUMN_VECTORN_START + (pVtab->numVectorColumns - 1) +
|
/**
|
||||||
|
* @brief Returns the index of the k hidden column for the given vec0 table.
|
||||||
|
*
|
||||||
|
* @param p vec0 table
|
||||||
|
* @return int k column index
|
||||||
|
*/
|
||||||
|
int vec0_column_k_idx(vec0_vtab *p) {
|
||||||
|
return VEC0_COLUMN_VECTORN_START + (p->numVectorColumns - 1) +
|
||||||
VEC0_COLUMN_OFFSET_K;
|
VEC0_COLUMN_OFFSET_K;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3972,13 +3995,13 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
|
||||||
VEC0_MAX_VECTOR_COLUMNS);
|
VEC0_MAX_VECTOR_COLUMNS);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
#define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192
|
#define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192
|
||||||
if(c.dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS) {
|
if (c.dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS) {
|
||||||
sqlite3_free(c.name);
|
sqlite3_free(c.name);
|
||||||
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
|
*pzErr = sqlite3_mprintf(
|
||||||
"Dimension on vector column too large, provided %lld, maximum %lld",
|
VEC_CONSTRUCTOR_ERROR
|
||||||
(i64) c.dimensions,
|
"Dimension on vector column too large, provided %lld, maximum %lld",
|
||||||
SQLITE_VEC_VEC0_MAX_DIMENSIONS);
|
(i64)c.dimensions, SQLITE_VEC_VEC0_MAX_DIMENSIONS);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
memcpy(&pNew->vector_columns[numVectorColumns], &c, sizeof(c));
|
memcpy(&pNew->vector_columns[numVectorColumns], &c, sizeof(c));
|
||||||
|
|
@ -4032,10 +4055,10 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
|
||||||
"chunk_size must be divisible by 8");
|
"chunk_size must be divisible by 8");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
#define SQLITE_VEC_CHUNK_SIZE_MAX 4096
|
#define SQLITE_VEC_CHUNK_SIZE_MAX 4096
|
||||||
if (chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX) {
|
if (chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX) {
|
||||||
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
|
*pzErr =
|
||||||
"chunk_size too large");
|
sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR "chunk_size too large");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -4345,11 +4368,11 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
|
||||||
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
|
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
|
||||||
u8 vtabIn = 0;
|
u8 vtabIn = 0;
|
||||||
|
|
||||||
#if COMPILER_SUPPORTS_VTAB_IN
|
#if COMPILER_SUPPORTS_VTAB_IN
|
||||||
if (sqlite3_libversion_number() >= 3038000) {
|
if (sqlite3_libversion_number() >= 3038000) {
|
||||||
vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
|
vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SQLITE_VEC_DEBUG
|
#ifdef SQLITE_VEC_DEBUG
|
||||||
printf("xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d\n", i,
|
printf("xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d\n", i,
|
||||||
|
|
@ -4437,7 +4460,7 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
|
||||||
sqlite3_str_appendall(idxStr, "knn:");
|
sqlite3_str_appendall(idxStr, "knn:");
|
||||||
#define VEC0_IDX_KNN_ROWID_IN 'I'
|
#define VEC0_IDX_KNN_ROWID_IN 'I'
|
||||||
|
|
||||||
#if COMPILER_SUPPORTS_VTAB_IN
|
#if COMPILER_SUPPORTS_VTAB_IN
|
||||||
if (iRowidInTerm >= 0) {
|
if (iRowidInTerm >= 0) {
|
||||||
// already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when
|
// already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when
|
||||||
// vtabIn == 1
|
// vtabIn == 1
|
||||||
|
|
@ -4446,7 +4469,7 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
|
||||||
pIdxInfo->aConstraintUsage[iRowidInTerm].argvIndex = 3;
|
pIdxInfo->aConstraintUsage[iRowidInTerm].argvIndex = 3;
|
||||||
pIdxInfo->aConstraintUsage[iRowidInTerm].omit = 1;
|
pIdxInfo->aConstraintUsage[iRowidInTerm].omit = 1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pIdxInfo->idxNum = iMatchVectorTerm;
|
pIdxInfo->idxNum = iMatchVectorTerm;
|
||||||
pIdxInfo->idxStr = sqlite3_str_finish(idxStr);
|
pIdxInfo->idxStr = sqlite3_str_finish(idxStr);
|
||||||
|
|
@ -4961,10 +4984,12 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
|
||||||
rc = SQLITE_ERROR;
|
rc = SQLITE_ERROR;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
#define SQLITE_VEC_VEC0_K_MAX 4096
|
#define SQLITE_VEC_VEC0_K_MAX 4096
|
||||||
if(k > SQLITE_VEC_VEC0_K_MAX) {
|
if (k > SQLITE_VEC_VEC0_K_MAX) {
|
||||||
vtab_set_error(
|
vtab_set_error(
|
||||||
&p->base, "k value in knn query too large, provided %lld and the limit is %lld", k, SQLITE_VEC_VEC0_K_MAX);
|
&p->base,
|
||||||
|
"k value in knn query too large, provided %lld and the limit is %lld",
|
||||||
|
k, SQLITE_VEC_VEC0_K_MAX);
|
||||||
rc = SQLITE_ERROR;
|
rc = SQLITE_ERROR;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
@ -4977,10 +5002,10 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle when a `rowid in (...)` operation was provided
|
// handle when a `rowid in (...)` operation was provided
|
||||||
// Array of all the rowids that appear in any `rowid in (...)` constraint.
|
// Array of all the rowids that appear in any `rowid in (...)` constraint.
|
||||||
// NULL if none were provided, which means a "full" scan.
|
// NULL if none were provided, which means a "full" scan.
|
||||||
#if COMPILER_SUPPORTS_VTAB_IN
|
#if COMPILER_SUPPORTS_VTAB_IN
|
||||||
if (argc > 2) {
|
if (argc > 2) {
|
||||||
sqlite3_value *item;
|
sqlite3_value *item;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
@ -5018,7 +5043,7 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
|
||||||
qsort(arrayRowidsIn->z, arrayRowidsIn->length, arrayRowidsIn->element_size,
|
qsort(arrayRowidsIn->z, arrayRowidsIn->length, arrayRowidsIn->element_size,
|
||||||
_cmp);
|
_cmp);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
char *zSql;
|
char *zSql;
|
||||||
zSql = sqlite3_mprintf("select chunk_id, validity, rowids "
|
zSql = sqlite3_mprintf("select chunk_id, validity, rowids "
|
||||||
|
|
@ -5998,19 +6023,19 @@ cleanup:
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value * idValue) {
|
int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
|
||||||
vec0_vtab *p = (vec0_vtab *)pVTab;
|
vec0_vtab *p = (vec0_vtab *)pVTab;
|
||||||
int rc;
|
int rc;
|
||||||
i64 rowid;
|
i64 rowid;
|
||||||
i64 chunk_id;
|
i64 chunk_id;
|
||||||
i64 chunk_offset;
|
i64 chunk_offset;
|
||||||
|
|
||||||
if(p->pkIsText) {
|
if (p->pkIsText) {
|
||||||
rc = vec0_rowid_from_id(p, idValue, &rowid);
|
rc = vec0_rowid_from_id(p, idValue, &rowid);
|
||||||
if (rc != SQLITE_OK) {
|
if (rc != SQLITE_OK) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}else {
|
} else {
|
||||||
rowid = sqlite3_value_int64(idValue);
|
rowid = sqlite3_value_int64(idValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -6031,9 +6056,11 @@ int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value * idValue) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. zero out rowid in chunks.rowids https://github.com/asg017/sqlite-vec/issues/54
|
// 3. zero out rowid in chunks.rowids
|
||||||
|
// https://github.com/asg017/sqlite-vec/issues/54
|
||||||
|
|
||||||
// 4. zero out any data in vector chunks tables https://github.com/asg017/sqlite-vec/issues/54
|
// 4. zero out any data in vector chunks tables
|
||||||
|
// https://github.com/asg017/sqlite-vec/issues/54
|
||||||
|
|
||||||
// 5. delete from _rowids table
|
// 5. delete from _rowids table
|
||||||
rc = vec0Update_Delete_DeleteRowids(p, rowid);
|
rc = vec0Update_Delete_DeleteRowids(p, rowid);
|
||||||
|
|
@ -6122,8 +6149,7 @@ cleanup:
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int vec0Update_Update(sqlite3_vtab *pVTab, int argc,
|
int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) {
|
||||||
sqlite3_value **argv) {
|
|
||||||
UNUSED_PARAMETER(argc);
|
UNUSED_PARAMETER(argc);
|
||||||
vec0_vtab *p = (vec0_vtab *)pVTab;
|
vec0_vtab *p = (vec0_vtab *)pVTab;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
@ -6131,22 +6157,21 @@ int vec0Update_Update(sqlite3_vtab *pVTab, int argc,
|
||||||
i64 chunk_offset;
|
i64 chunk_offset;
|
||||||
|
|
||||||
i64 rowid;
|
i64 rowid;
|
||||||
if(p->pkIsText) {
|
if (p->pkIsText) {
|
||||||
const char * a = (const char *) sqlite3_value_text(argv[0]);
|
const char *a = (const char *)sqlite3_value_text(argv[0]);
|
||||||
const char * b = (const char *) sqlite3_value_text(argv[1]);
|
const char *b = (const char *)sqlite3_value_text(argv[1]);
|
||||||
// IMP: V08886_25725
|
// IMP: V08886_25725
|
||||||
if(
|
if ((sqlite3_value_bytes(argv[0]) != sqlite3_value_bytes(argv[1])) ||
|
||||||
(sqlite3_value_bytes(argv[0]) != sqlite3_value_bytes(argv[1]))
|
strncmp(a, b, sqlite3_value_bytes(argv[0])) != 0) {
|
||||||
|| strncmp(a, b, sqlite3_value_bytes(argv[0])) != 0
|
vtab_set_error(pVTab,
|
||||||
) {
|
"UPDATEs on vec0 primary key values are not allowed.");
|
||||||
vtab_set_error(pVTab, "UPDATEs on vec0 primary key values are not allowed.");
|
return SQLITE_ERROR;
|
||||||
return SQLITE_ERROR;
|
|
||||||
}
|
}
|
||||||
rc = vec0_rowid_from_id(p, argv[0], &rowid);
|
rc = vec0_rowid_from_id(p, argv[0], &rowid);
|
||||||
if(rc != SQLITE_OK) {
|
if (rc != SQLITE_OK) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}else {
|
} else {
|
||||||
rowid = sqlite3_value_int64(argv[0]);
|
rowid = sqlite3_value_int64(argv[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -6561,7 +6586,6 @@ void sbe_query_knn_data_clear(struct sbe_query_knn_data *knn_data) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
typedef struct vec_static_blob_entries_cursor vec_static_blob_entries_cursor;
|
typedef struct vec_static_blob_entries_cursor vec_static_blob_entries_cursor;
|
||||||
struct vec_static_blob_entries_cursor {
|
struct vec_static_blob_entries_cursor {
|
||||||
sqlite3_vtab_cursor base;
|
sqlite3_vtab_cursor base;
|
||||||
|
|
@ -6768,7 +6792,6 @@ static int vec_static_blob_entriesFilter(sqlite3_vtab_cursor *pVtabCursor,
|
||||||
|
|
||||||
size_t bsize = (p->blob->nvectors + 7) & ~7;
|
size_t bsize = (p->blob->nvectors + 7) & ~7;
|
||||||
|
|
||||||
|
|
||||||
i32 *topk_rowids = sqlite3_malloc(k * sizeof(i32));
|
i32 *topk_rowids = sqlite3_malloc(k * sizeof(i32));
|
||||||
if (!topk_rowids) {
|
if (!topk_rowids) {
|
||||||
// HANDLE https://github.com/asg017/sqlite-vec/issues/55
|
// HANDLE https://github.com/asg017/sqlite-vec/issues/55
|
||||||
|
|
@ -6786,14 +6809,14 @@ static int vec_static_blob_entriesFilter(sqlite3_vtab_cursor *pVtabCursor,
|
||||||
distances[i] =
|
distances[i] =
|
||||||
distance_l2_sqr_float(v, (float *)queryVector, &p->blob->dimensions);
|
distance_l2_sqr_float(v, (float *)queryVector, &p->blob->dimensions);
|
||||||
}
|
}
|
||||||
u8 * candidates = bitmap_new(bsize);
|
u8 *candidates = bitmap_new(bsize);
|
||||||
assert(candidates);
|
assert(candidates);
|
||||||
|
|
||||||
u8 * taken = bitmap_new(bsize);
|
u8 *taken = bitmap_new(bsize);
|
||||||
assert(taken);
|
assert(taken);
|
||||||
|
|
||||||
bitmap_fill(candidates, bsize);
|
bitmap_fill(candidates, bsize);
|
||||||
for(size_t i = bsize; i >= p->blob->nvectors; i--) {
|
for (size_t i = bsize; i >= p->blob->nvectors; i--) {
|
||||||
bitmap_set(candidates, i, 0);
|
bitmap_set(candidates, i, 0);
|
||||||
}
|
}
|
||||||
i32 k_used = 0;
|
i32 k_used = 0;
|
||||||
|
|
@ -6822,12 +6845,11 @@ static int vec_static_blob_entriesRowid(sqlite3_vtab_cursor *cur,
|
||||||
}
|
}
|
||||||
case VEC_SBE__QUERYPLAN_KNN: {
|
case VEC_SBE__QUERYPLAN_KNN: {
|
||||||
i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx];
|
i32 rowid = ((i32 *)pCur->knn_data->rowids)[pCur->knn_data->current_idx];
|
||||||
*pRowid = (sqlite3_int64) rowid;
|
*pRowid = (sqlite3_int64)rowid;
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int vec_static_blob_entriesNext(sqlite3_vtab_cursor *cur) {
|
static int vec_static_blob_entriesNext(sqlite3_vtab_cursor *cur) {
|
||||||
|
|
@ -6887,7 +6909,8 @@ static int vec_static_blob_entriesColumn(sqlite3_vtab_cursor *cur,
|
||||||
sqlite3_result_blob(context,
|
sqlite3_result_blob(context,
|
||||||
((unsigned char *)p->blob->p) +
|
((unsigned char *)p->blob->p) +
|
||||||
(rowid * p->blob->dimensions * sizeof(float)),
|
(rowid * p->blob->dimensions * sizeof(float)),
|
||||||
p->blob->dimensions * sizeof(float), SQLITE_TRANSIENT);
|
p->blob->dimensions * sizeof(float),
|
||||||
|
SQLITE_TRANSIENT);
|
||||||
sqlite3_result_subtype(context, p->blob->element_type);
|
sqlite3_result_subtype(context, p->blob->element_type);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -6900,7 +6923,9 @@ static int vec_static_blob_entriesColumn(sqlite3_vtab_cursor *cur,
|
||||||
|
|
||||||
static sqlite3_module vec_static_blob_entriesModule = {
|
static sqlite3_module vec_static_blob_entriesModule = {
|
||||||
/* iVersion */ 3,
|
/* iVersion */ 3,
|
||||||
/* xCreate */ vec_static_blob_entriesCreate, // handle rm? https://github.com/asg017/sqlite-vec/issues/55
|
/* xCreate */
|
||||||
|
vec_static_blob_entriesCreate, // handle rm?
|
||||||
|
// https://github.com/asg017/sqlite-vec/issues/55
|
||||||
/* xConnect */ vec_static_blob_entriesConnect,
|
/* xConnect */ vec_static_blob_entriesConnect,
|
||||||
/* xBestIndex */ vec_static_blob_entriesBestIndex,
|
/* xBestIndex */ vec_static_blob_entriesBestIndex,
|
||||||
/* xDisconnect */ vec_static_blob_entriesDisconnect,
|
/* xDisconnect */ vec_static_blob_entriesDisconnect,
|
||||||
|
|
@ -6949,9 +6974,8 @@ static sqlite3_module vec_static_blob_entriesModule = {
|
||||||
"Commit: " SQLITE_VEC_SOURCE "\n" \
|
"Commit: " SQLITE_VEC_SOURCE "\n" \
|
||||||
"Build flags: " SQLITE_VEC_DEBUG_BUILD
|
"Build flags: " SQLITE_VEC_DEBUG_BUILD
|
||||||
|
|
||||||
|
|
||||||
SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
||||||
const sqlite3_api_routines *pApi) {
|
const sqlite3_api_routines *pApi) {
|
||||||
SQLITE_EXTENSION_INIT2(pApi);
|
SQLITE_EXTENSION_INIT2(pApi);
|
||||||
int rc = SQLITE_OK;
|
int rc = SQLITE_OK;
|
||||||
|
|
||||||
|
|
@ -7035,7 +7059,7 @@ SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
||||||
|
|
||||||
#ifndef SQLITE_VEC_OMIT_FS
|
#ifndef SQLITE_VEC_OMIT_FS
|
||||||
SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
||||||
const sqlite3_api_routines *pApi) {
|
const sqlite3_api_routines *pApi) {
|
||||||
UNUSED_PARAMETER(pzErrMsg);
|
UNUSED_PARAMETER(pzErrMsg);
|
||||||
SQLITE_EXTENSION_INIT2(pApi);
|
SQLITE_EXTENSION_INIT2(pApi);
|
||||||
int rc = SQLITE_OK;
|
int rc = SQLITE_OK;
|
||||||
|
|
@ -7045,9 +7069,9 @@ SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
SQLITE_VEC_API int
|
||||||
SQLITE_VEC_API int sqlite3_vec_static_blobs_init(sqlite3 *db, char **pzErrMsg,
|
sqlite3_vec_static_blobs_init(sqlite3 *db, char **pzErrMsg,
|
||||||
const sqlite3_api_routines *pApi) {
|
const sqlite3_api_routines *pApi) {
|
||||||
UNUSED_PARAMETER(pzErrMsg);
|
UNUSED_PARAMETER(pzErrMsg);
|
||||||
SQLITE_EXTENSION_INIT2(pApi);
|
SQLITE_EXTENSION_INIT2(pApi);
|
||||||
|
|
||||||
|
|
@ -7059,16 +7083,21 @@ SQLITE_VEC_API int sqlite3_vec_static_blobs_init(sqlite3 *db, char **pzErrMsg,
|
||||||
}
|
}
|
||||||
memset(static_blob_data, 0, sizeof(*static_blob_data));
|
memset(static_blob_data, 0, sizeof(*static_blob_data));
|
||||||
|
|
||||||
rc = sqlite3_create_function_v2(db, "vec_static_blob_from_raw", 4, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE,
|
rc = sqlite3_create_function_v2(
|
||||||
NULL, vec_static_blob_from_raw, NULL, NULL, NULL);
|
db, "vec_static_blob_from_raw", 4,
|
||||||
if(rc != SQLITE_OK) return rc;
|
DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, NULL,
|
||||||
|
vec_static_blob_from_raw, NULL, NULL, NULL);
|
||||||
|
if (rc != SQLITE_OK)
|
||||||
|
return rc;
|
||||||
|
|
||||||
rc = sqlite3_create_module_v2(db, "vec_static_blobs", &vec_static_blobsModule,
|
rc = sqlite3_create_module_v2(db, "vec_static_blobs", &vec_static_blobsModule,
|
||||||
static_blob_data, sqlite3_free);
|
static_blob_data, sqlite3_free);
|
||||||
if(rc != SQLITE_OK) return rc;
|
if (rc != SQLITE_OK)
|
||||||
|
return rc;
|
||||||
rc = sqlite3_create_module_v2(db, "vec_static_blob_entries",
|
rc = sqlite3_create_module_v2(db, "vec_static_blob_entries",
|
||||||
&vec_static_blob_entriesModule,
|
&vec_static_blob_entriesModule,
|
||||||
static_blob_data, NULL);
|
static_blob_data, NULL);
|
||||||
if(rc != SQLITE_OK) return rc;
|
if (rc != SQLITE_OK)
|
||||||
|
return rc;
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2274,42 +2274,36 @@ def test_smoke():
|
||||||
|
|
||||||
db.execute("insert into vec_xyz(rowid, a) select 2, X'0000000000000040'")
|
db.execute("insert into vec_xyz(rowid, a) select 2, X'0000000000000040'")
|
||||||
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
||||||
assert (
|
assert chunk[
|
||||||
chunk["rowids"]
|
"rowids"
|
||||||
== b"\x01\x00\x00\x00\x00\x00\x00\x00"
|
] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + bytearray(
|
||||||
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
|
int(1024 * 8) - 8 * 2
|
||||||
+ bytearray(int(1024 * 8) - 8 * 2)
|
|
||||||
)
|
)
|
||||||
assert chunk["chunk_id"] == 1
|
assert chunk["chunk_id"] == 1
|
||||||
assert chunk["validity"] == b"\x03" + bytearray(int(1024 / 8) - 1)
|
assert chunk["validity"] == b"\x03" + bytearray(int(1024 / 8) - 1)
|
||||||
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
||||||
assert vchunk["rowid"] == 1
|
assert vchunk["rowid"] == 1
|
||||||
assert (
|
assert vchunk[
|
||||||
vchunk["vectors"]
|
"vectors"
|
||||||
== b"\x00\x00\x00\x00\x00\x00\x80\x3f"
|
] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + bytearray(
|
||||||
+ b"\x00\x00\x00\x00\x00\x00\x00\x40"
|
int(1024 * 4 * 2) - (2 * 4 * 2)
|
||||||
+ bytearray(int(1024 * 4 * 2) - (2 * 4 * 2))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
db.execute("insert into vec_xyz(rowid, a) select 3, X'00000000000080bf'")
|
db.execute("insert into vec_xyz(rowid, a) select 3, X'00000000000080bf'")
|
||||||
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
||||||
assert chunk["chunk_id"] == 1
|
assert chunk["chunk_id"] == 1
|
||||||
assert chunk["validity"] == b"\x07" + bytearray(int(1024 / 8) - 1)
|
assert chunk["validity"] == b"\x07" + bytearray(int(1024 / 8) - 1)
|
||||||
assert (
|
assert chunk[
|
||||||
chunk["rowids"]
|
"rowids"
|
||||||
== b"\x01\x00\x00\x00\x00\x00\x00\x00"
|
] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + b"\x03\x00\x00\x00\x00\x00\x00\x00" + bytearray(
|
||||||
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
|
int(1024 * 8) - 8 * 3
|
||||||
+ b"\x03\x00\x00\x00\x00\x00\x00\x00"
|
|
||||||
+ bytearray(int(1024 * 8) - 8 * 3)
|
|
||||||
)
|
)
|
||||||
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
||||||
assert vchunk["rowid"] == 1
|
assert vchunk["rowid"] == 1
|
||||||
assert (
|
assert vchunk[
|
||||||
vchunk["vectors"]
|
"vectors"
|
||||||
== b"\x00\x00\x00\x00\x00\x00\x80\x3f"
|
] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + b"\x00\x00\x00\x00\x00\x00\x80\xbf" + bytearray(
|
||||||
+ b"\x00\x00\x00\x00\x00\x00\x00\x40"
|
int(1024 * 4 * 2) - (2 * 4 * 3)
|
||||||
+ b"\x00\x00\x00\x00\x00\x00\x80\xbf"
|
|
||||||
+ bytearray(int(1024 * 4 * 2) - (2 * 4 * 3))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# db.execute("select * from vec_xyz")
|
# db.execute("select * from vec_xyz")
|
||||||
|
|
@ -2352,63 +2346,66 @@ def test_vec0_stress_small_chunks():
|
||||||
{"rowid": 994, "a": _f32([99.4] * 8)},
|
{"rowid": 994, "a": _f32([99.4] * 8)},
|
||||||
{"rowid": 993, "a": _f32([99.3] * 8)},
|
{"rowid": 993, "a": _f32([99.3] * 8)},
|
||||||
]
|
]
|
||||||
assert execute_all(
|
assert (
|
||||||
db,
|
execute_all(
|
||||||
"""
|
db,
|
||||||
|
"""
|
||||||
select rowid, a, distance
|
select rowid, a, distance
|
||||||
from vec_small
|
from vec_small
|
||||||
where a match ?
|
where a match ?
|
||||||
and k = 9
|
and k = 9
|
||||||
order by distance
|
order by distance
|
||||||
""",
|
""",
|
||||||
[_f32([50.0] * 8)],
|
[_f32([50.0] * 8)],
|
||||||
) == [
|
)
|
||||||
{
|
== [
|
||||||
"a": _f32([500 * 0.1] * 8),
|
{
|
||||||
"distance": 0.0,
|
"a": _f32([500 * 0.1] * 8),
|
||||||
"rowid": 500,
|
"distance": 0.0,
|
||||||
},
|
"rowid": 500,
|
||||||
{
|
},
|
||||||
"a": _f32([501 * 0.1] * 8),
|
{
|
||||||
"distance": 0.2828384041786194,
|
"a": _f32([501 * 0.1] * 8),
|
||||||
"rowid": 501,
|
"distance": 0.2828384041786194,
|
||||||
},
|
"rowid": 501,
|
||||||
{
|
},
|
||||||
"a": _f32([499 * 0.1] * 8),
|
{
|
||||||
"distance": 0.2828384041786194,
|
"a": _f32([499 * 0.1] * 8),
|
||||||
"rowid": 499,
|
"distance": 0.2828384041786194,
|
||||||
},
|
"rowid": 499,
|
||||||
{
|
},
|
||||||
"a": _f32([502 * 0.1] * 8),
|
{
|
||||||
"distance": 0.5656875967979431,
|
"a": _f32([502 * 0.1] * 8),
|
||||||
"rowid": 502,
|
"distance": 0.5656875967979431,
|
||||||
},
|
"rowid": 502,
|
||||||
{
|
},
|
||||||
"a": _f32([498 * 0.1] * 8),
|
{
|
||||||
"distance": 0.5656875967979431,
|
"a": _f32([498 * 0.1] * 8),
|
||||||
"rowid": 498,
|
"distance": 0.5656875967979431,
|
||||||
},
|
"rowid": 498,
|
||||||
{
|
},
|
||||||
"a": _f32([503 * 0.1] * 8),
|
{
|
||||||
"distance": 0.8485260009765625,
|
"a": _f32([503 * 0.1] * 8),
|
||||||
"rowid": 503,
|
"distance": 0.8485260009765625,
|
||||||
},
|
"rowid": 503,
|
||||||
{
|
},
|
||||||
"a": _f32([497 * 0.1] * 8),
|
{
|
||||||
"distance": 0.8485260009765625,
|
"a": _f32([497 * 0.1] * 8),
|
||||||
"rowid": 497,
|
"distance": 0.8485260009765625,
|
||||||
},
|
"rowid": 497,
|
||||||
{
|
},
|
||||||
"a": _f32([496 * 0.1] * 8),
|
{
|
||||||
"distance": 1.1313751935958862,
|
"a": _f32([496 * 0.1] * 8),
|
||||||
"rowid": 496,
|
"distance": 1.1313751935958862,
|
||||||
},
|
"rowid": 496,
|
||||||
{
|
},
|
||||||
"a": _f32([504 * 0.1] * 8),
|
{
|
||||||
"distance": 1.1313751935958862,
|
"a": _f32([504 * 0.1] * 8),
|
||||||
"rowid": 504,
|
"distance": 1.1313751935958862,
|
||||||
},
|
"rowid": 504,
|
||||||
]
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_vec0_distance_metric():
|
def test_vec0_distance_metric():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue