From 29507aa45dc1773280380c4a56f81bfb91c4a973 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Sat, 20 Apr 2024 17:02:19 -0700 Subject: [PATCH] fmt + better typedef common types --- sqlite-vec.c | 334 ++++++++++++++++++++--------------------- tests/test-loadable.py | 8 +- 2 files changed, 169 insertions(+), 173 deletions(-) diff --git a/sqlite-vec.c b/sqlite-vec.c index f70fd0e..04033d9 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -1,56 +1,55 @@ +#include "sqlite-vec.h" #include #include +#include #include #include #include +#include #include #include #include -#include -#include -#include "sqlite-vec.h" #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 - #ifndef UINT32_TYPE -# ifdef HAVE_UINT32_T -# define UINT32_TYPE uint32_t -# else -# define UINT32_TYPE unsigned int -# endif +#ifdef HAVE_UINT32_T +#define UINT32_TYPE uint32_t +#else +#define UINT32_TYPE unsigned int +#endif #endif #ifndef UINT16_TYPE -# ifdef HAVE_UINT16_T -# define UINT16_TYPE uint16_t -# else -# define UINT16_TYPE unsigned short int -# endif +#ifdef HAVE_UINT16_T +#define UINT16_TYPE uint16_t +#else +#define UINT16_TYPE unsigned short int +#endif #endif #ifndef INT16_TYPE -# ifdef HAVE_INT16_T -# define INT16_TYPE int16_t -# else -# define INT16_TYPE short int -# endif +#ifdef HAVE_INT16_T +#define INT16_TYPE int16_t +#else +#define INT16_TYPE short int +#endif #endif #ifndef UINT8_TYPE -# ifdef HAVE_UINT8_T -# define UINT8_TYPE uint8_t -# else -# define UINT8_TYPE unsigned char -# endif +#ifdef HAVE_UINT8_T +#define UINT8_TYPE uint8_t +#else +#define UINT8_TYPE unsigned char +#endif #endif #ifndef INT8_TYPE -# ifdef HAVE_INT8_T -# define INT8_TYPE int8_t -# else -# define INT8_TYPE signed char -# endif +#ifdef HAVE_INT8_T +#define INT8_TYPE int8_t +#else +#define INT8_TYPE signed char +#endif #endif #ifndef LONGDOUBLE_TYPE -# define LONGDOUBLE_TYPE long double +#define LONGDOUBLE_TYPE long double #endif #ifndef _WIN32 @@ -59,6 +58,15 @@ typedef u_int16_t uint16_t; typedef u_int64_t uint64_t; #endif +typedef int8_t i8; +typedef uint8_t u8; +typedef int32_t i32; +typedef sqlite3_int64 i64; +typedef uint32_t u32; +typedef uint64_t u64; +typedef float f32; +typedef size_t usize; + #ifndef UNUSED_PARAMETER #define UNUSED_PARAMETER(X) (void)(X) #endif @@ -194,8 +202,8 @@ static float l2_sqr_float(const void *pVect1v, const void *pVect2v, } static float l2_sqr_int8(const void *pA, const void *pB, const void *pD) { - int8_t *a = (int8_t *)pA; - int8_t *b = (int8_t *)pB; + i8 *a = (i8 *)pA; + i8 *b = (i8 *)pB; size_t d = *((size_t *)pD); float res = 0; @@ -247,8 +255,8 @@ static float distance_cosine_float(const void *pVect1v, const void *pVect2v, } static float distance_cosine_int8(const void *pA, const void *pB, const void *pD) { - int8_t *a = (int8_t *)pA; - int8_t *b = (int8_t *)pB; + i8 *a = (i8 *)pA; + i8 *b = (i8 *)pB; size_t d = *((size_t *)pD); float dot = 0; @@ -265,7 +273,7 @@ static float distance_cosine_int8(const void *pA, const void *pB, } // https://github.com/facebookresearch/faiss/blob/77e2e79cd0a680adc343b9840dd865da724c579e/faiss/utils/hamming_distance/common.h#L34 -static uint8_t hamdist_table[256] = { +static u8 hamdist_table[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, @@ -278,14 +286,14 @@ static uint8_t hamdist_table[256] = { 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; -static float distance_hamming_u8(uint8_t *a, uint8_t *b, size_t n) { +static float distance_hamming_u8(u8 *a, u8 *b, size_t n) { int same = 0; for (unsigned long i = 0; i < n; i++) { same += hamdist_table[a[i] ^ b[i]]; } return (float)same; } -static float distance_hamming_u64(uint64_t *a, uint64_t *b, size_t n) { +static float distance_hamming_u64(u64 *a, u64 *b, size_t n) { int same = 0; for (unsigned long i = 0; i < n; i++) { same += __builtin_popcountl(a[i] ^ b[i]); @@ -298,10 +306,9 @@ static float distance_hamming(const void *a, const void *b, const void *d) { todo_assert((dimensions % CHAR_BIT) == 0); if ((dimensions % 64) == 0) { - return distance_hamming_u64((uint64_t *)a, (uint64_t *)b, - dimensions / 8 / CHAR_BIT); + return distance_hamming_u64((u64 *)a, (u64 *)b, dimensions / 8 / CHAR_BIT); } - return distance_hamming_u8((uint8_t *)a, (uint8_t *)b, dimensions / CHAR_BIT); + return distance_hamming_u8((u8 *)a, (u8 *)b, dimensions / CHAR_BIT); } // from SQLite source: @@ -493,7 +500,7 @@ static int fvec_from_value(sqlite3_value *value, float **vector, return SQLITE_ERROR; } -static int bitvec_from_value(sqlite3_value *value, uint8_t **vector, +static int bitvec_from_value(sqlite3_value *value, u8 **vector, size_t *dimensions, vector_cleanup *cleanup, char **pzErr) { int value_type = sqlite3_value_type(value); @@ -504,7 +511,7 @@ static int bitvec_from_value(sqlite3_value *value, uint8_t **vector, *pzErr = sqlite3_mprintf("zero-length vectors are not supported."); return SQLITE_ERROR; } - *vector = (uint8_t *)blob; + *vector = (u8 *)blob; *dimensions = bytes * CHAR_BIT; *cleanup = vector_cleanup_noop; return SQLITE_OK; @@ -513,7 +520,7 @@ static int bitvec_from_value(sqlite3_value *value, uint8_t **vector, return SQLITE_ERROR; } -static int int8_vec_from_value(sqlite3_value *value, int8_t **vector, +static int int8_vec_from_value(sqlite3_value *value, i8 **vector, size_t *dimensions, vector_cleanup *cleanup, char **pzErr) { int value_type = sqlite3_value_type(value); @@ -524,7 +531,7 @@ static int int8_vec_from_value(sqlite3_value *value, int8_t **vector, *pzErr = sqlite3_mprintf("zero-length vectors are not supported."); return SQLITE_ERROR; } - *vector = (int8_t *)blob; + *vector = (i8 *)blob; *dimensions = bytes; *cleanup = vector_cleanup_noop; return SQLITE_OK; @@ -560,7 +567,7 @@ int vector_from_value(sqlite3_value *value, void **vector, size_t *dimensions, } if (subtype == SQLITE_VEC_ELEMENT_TYPE_BIT) { - int rc = bitvec_from_value(value, (uint8_t **)vector, dimensions, cleanup, + int rc = bitvec_from_value(value, (u8 **)vector, dimensions, cleanup, pzErrorMessage); if (rc == SQLITE_OK) { *element_type = SQLITE_VEC_ELEMENT_TYPE_BIT; @@ -568,7 +575,7 @@ int vector_from_value(sqlite3_value *value, void **vector, size_t *dimensions, return rc; } if (subtype == SQLITE_VEC_ELEMENT_TYPE_INT8) { - int rc = int8_vec_from_value(value, (int8_t **)vector, dimensions, cleanup, + int rc = int8_vec_from_value(value, (i8 **)vector, dimensions, cleanup, pzErrorMessage); if (rc == SQLITE_OK) { *element_type = SQLITE_VEC_ELEMENT_TYPE_INT8; @@ -640,9 +647,7 @@ int ensure_vector_match(sqlite3_value *aValue, sqlite3_value *bValue, void **a, return SQLITE_OK; } -int _cmp(const void *a, const void *b) { - return (*(sqlite3_int64 *)a - *(sqlite3_int64 *)b); -} +int _cmp(const void *a, const void *b) { return (*(i64 *)a - *(i64 *)b); } struct VecNpyFile { char *path; @@ -682,7 +687,7 @@ static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) { static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **argv) { todo_assert(argc == 1); int rc; - uint8_t *vector; + u8 *vector; size_t dimensions; vector_cleanup cleanup; char *errmsg; @@ -699,7 +704,7 @@ static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **argv) { static void vec_int8(sqlite3_context *context, int argc, sqlite3_value **argv) { todo_assert(argc == 1); int rc; - int8_t *vector; + i8 *vector; size_t dimensions; vector_cleanup cleanup; char *errmsg; @@ -866,7 +871,7 @@ static void vec_quantize_i8(sqlite3_context *context, int argc, char *err; int rc = fvec_from_value(argv[0], &srcVector, &dimensions, &cleanup, &err); assert(rc == SQLITE_OK); - int8_t *out = sqlite3_malloc(dimensions * sizeof(int8_t)); + i8 *out = sqlite3_malloc(dimensions * sizeof(i8)); assert(out); if (argc == 2) { @@ -895,7 +900,7 @@ static void vec_quantize_i8(sqlite3_context *context, int argc, } cleanup(srcVector); - sqlite3_result_blob(context, out, dimensions * sizeof(int8_t), sqlite3_free); + sqlite3_result_blob(context, out, dimensions * sizeof(i8), sqlite3_free); sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); return; } @@ -917,7 +922,7 @@ static void vec_quantize_binary(sqlite3_context *context, int argc, } if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) { - uint8_t *out = sqlite3_malloc(dimensions / CHAR_BIT); + u8 *out = sqlite3_malloc(dimensions / CHAR_BIT); todo_assert(out); for (size_t i = 0; i < dimensions; i++) { int res = ((float *)vector)[i] > 0.0; @@ -926,10 +931,10 @@ static void vec_quantize_binary(sqlite3_context *context, int argc, sqlite3_result_blob(context, out, dimensions / CHAR_BIT, sqlite3_free); sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT); } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) { - uint8_t *out = sqlite3_malloc(dimensions / CHAR_BIT); + u8 *out = sqlite3_malloc(dimensions / CHAR_BIT); todo_assert(out); for (size_t i = 0; i < dimensions; i++) { - int res = ((int8_t *)vector)[i] > 0; + int res = ((i8 *)vector)[i] > 0; out[i / 8] |= (res << (i % 8)); } sqlite3_result_blob(context, out, dimensions / CHAR_BIT, sqlite3_free); @@ -975,14 +980,14 @@ static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) { goto finish; } case SQLITE_VEC_ELEMENT_TYPE_INT8: { - size_t outSize = dimensions * sizeof(int8_t); - int8_t *out = sqlite3_malloc(outSize); + size_t outSize = dimensions * sizeof(i8); + i8 *out = sqlite3_malloc(outSize); if (!out) { sqlite3_result_error_nomem(context); goto finish; } for (size_t i = 0; i < dimensions; i++) { - out[i] = ((int8_t *)a)[i] + ((int8_t *)b)[i]; + out[i] = ((i8 *)a)[i] + ((i8 *)b)[i]; } sqlite3_result_blob(context, out, outSize, sqlite3_free); sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); @@ -1031,14 +1036,14 @@ static void vec_sub(sqlite3_context *context, int argc, sqlite3_value **argv) { goto finish; } case SQLITE_VEC_ELEMENT_TYPE_INT8: { - size_t outSize = dimensions * sizeof(int8_t); - int8_t *out = sqlite3_malloc(outSize); + size_t outSize = dimensions * sizeof(i8); + i8 *out = sqlite3_malloc(outSize); if (!out) { sqlite3_result_error_nomem(context); goto finish; } for (size_t i = 0; i < dimensions; i++) { - out[i] = ((int8_t *)a)[i] - ((int8_t *)b)[i]; + out[i] = ((i8 *)a)[i] - ((i8 *)b)[i]; } sqlite3_result_blob(context, out, outSize, sqlite3_free); sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); @@ -1115,15 +1120,15 @@ static void vec_slice(sqlite3_context *context, int argc, goto done; } case SQLITE_VEC_ELEMENT_TYPE_INT8: { - int8_t *out = sqlite3_malloc(n * sizeof(int8_t)); + i8 *out = sqlite3_malloc(n * sizeof(i8)); if (!out) { sqlite3_result_error_nomem(context); return; } for (size_t i = 0; i < n; i++) { - out[i] = ((int8_t *)vector)[start + i]; + out[i] = ((i8 *)vector)[start + i]; } - sqlite3_result_blob(context, out, n * sizeof(int8_t), sqlite3_free); + sqlite3_result_blob(context, out, n * sizeof(i8), sqlite3_free); sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8); goto done; } @@ -1137,13 +1142,13 @@ static void vec_slice(sqlite3_context *context, int argc, goto done; } - uint8_t *out = sqlite3_malloc(n / CHAR_BIT); + u8 *out = sqlite3_malloc(n / CHAR_BIT); if (!out) { sqlite3_result_error_nomem(context); return; } for (size_t i = 0; i < n / CHAR_BIT; i++) { - out[i] = ((uint8_t *)vector)[(start / CHAR_BIT) + i]; + out[i] = ((u8 *)vector)[(start / CHAR_BIT) + i]; } sqlite3_result_blob(context, out, n / CHAR_BIT, sqlite3_free); sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT); @@ -1180,9 +1185,9 @@ static void vec_to_json(sqlite3_context *context, int argc, if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) { sqlite3_str_appendf(str, "%f", ((float *)vector)[i]); } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) { - sqlite3_str_appendf(str, "%d", ((int8_t *)vector)[i]); + sqlite3_str_appendf(str, "%d", ((i8 *)vector)[i]); } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) { - uint8_t b = (((uint8_t *)vector)[i / 8] >> (i % CHAR_BIT)) & 1; + u8 b = (((u8 *)vector)[i / 8] >> (i % CHAR_BIT)) & 1; sqlite3_str_appendf(str, "%d", b); } } @@ -1479,7 +1484,7 @@ size_t vector_column_byte_size(struct VectorColumnDefinition column) { case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: return column.dimensions * sizeof(float); case SQLITE_VEC_ELEMENT_TYPE_INT8: - return column.dimensions * sizeof(int8_t); + return column.dimensions * sizeof(i8); case SQLITE_VEC_ELEMENT_TYPE_BIT: return column.dimensions / CHAR_BIT; } @@ -1601,7 +1606,7 @@ struct vec_each_vtab { typedef struct vec_each_cursor vec_each_cursor; struct vec_each_cursor { sqlite3_vtab_cursor base; - sqlite3_int64 iRowid; + i64 iRowid; enum VectorElementType vector_type; void *vector; size_t dimensions; @@ -1708,7 +1713,7 @@ static int vec_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { static int vec_eachEof(sqlite3_vtab_cursor *cur) { vec_each_cursor *pCur = (vec_each_cursor *)cur; - return pCur->iRowid >= (sqlite3_int64)pCur->dimensions; + return pCur->iRowid >= (i64)pCur->dimensions; } static int vec_eachNext(sqlite3_vtab_cursor *cur) { @@ -1728,13 +1733,13 @@ static int vec_eachColumn(sqlite3_vtab_cursor *cur, sqlite3_context *context, break; } case SQLITE_VEC_ELEMENT_TYPE_BIT: { - uint8_t x = ((uint8_t *)pCur->vector)[pCur->iRowid / CHAR_BIT]; + u8 x = ((u8 *)pCur->vector)[pCur->iRowid / CHAR_BIT]; sqlite3_result_int(context, (x & (0b10000000 >> ((pCur->iRowid % CHAR_BIT)))) > 0); break; } case SQLITE_VEC_ELEMENT_TYPE_INT8: { - sqlite3_result_int(context, ((int8_t *)pCur->vector)[pCur->iRowid]); + sqlite3_result_int(context, ((i8 *)pCur->vector)[pCur->iRowid]); break; } } @@ -1986,8 +1991,8 @@ int parse_npy(const unsigned char *buffer, size_t bufferLength, void **data, for (size_t i = 0; i < sizeof(NPY_MAGIC); i++) { todo_assert(NPY_MAGIC[i] == buffer[i]); } - uint8_t major = buffer[6]; - uint8_t minor = buffer[7]; + u8 major = buffer[6]; + u8 minor = buffer[7]; uint16_t headerLength = 0; memcpy(&headerLength, &buffer[8], sizeof(uint16_t)); @@ -2030,7 +2035,7 @@ typedef enum { typedef struct vec_npy_each_cursor vec_npy_each_cursor; struct vec_npy_each_cursor { sqlite3_vtab_cursor base; - sqlite3_int64 iRowid; + i64 iRowid; // sqlite-vec compatible type of vector enum VectorElementType elementType; // number of vectors in the npy array @@ -2199,8 +2204,8 @@ static int vec_npy_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, for (size_t i = 0; i < countof(NPY_MAGIC); i++) { todo_assert(NPY_MAGIC[i] == header[i]); } - uint8_t major = header[6]; - uint8_t minor = header[7]; + u8 major = header[6]; + u8 minor = header[7]; uint16_t headerLength = 0; memcpy(&headerLength, &header[8], sizeof(uint16_t)); @@ -2516,8 +2521,8 @@ struct vec0_vtab { * Parameters: * 1: rowid of the row/vector to lookup * Result columns: - * 0: chunk_id (sqlite3_int64) - * 1: chunk_offset (sqlite3_int64) + * 0: chunk_id (i64) + * 1: chunk_offset (i64) * SQL: "SELECT chunk_id, chunk_offset FROM _rowids WHERE rowid = ?"" * * Must be cleaned up with sqlite3_finalize(). @@ -2579,7 +2584,7 @@ int vec0_column_idx_to_vector_idx(vec0_vtab *pVtab, int column_idx) { * Must be cleaned up with sqlite3_value_free(). * @returns SQLITE_OK on success, error code on failure */ -int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, sqlite3_int64 rowid, +int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid, sqlite3_value **out) { // TODO different stmt than stmtRowidsGetChunkPosition? // TODO return rc instead @@ -2597,8 +2602,7 @@ int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, sqlite3_int64 rowid, } // TODO make sure callees use the return value of this function -int vec0_result_id(vec0_vtab *p, sqlite3_context *context, - sqlite3_int64 rowid) { +int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) { if (!p->pkIsText) { sqlite3_result_int64(context, rowid); return SQLITE_OK; @@ -2629,9 +2633,8 @@ int vec0_result_id(vec0_vtab *p, sqlite3_context *context, * will be stored. * @return int SQLITE_OK on success. */ -int vec0_get_vector_data(vec0_vtab *pVtab, sqlite3_int64 rowid, - int vector_column_idx, void **outVector, - int *outVectorSize) { +int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx, + void **outVector, int *outVectorSize) { todo_assert((vector_column_idx >= 0) && (vector_column_idx < pVtab->numVectorColumns)); @@ -2640,10 +2643,8 @@ int vec0_get_vector_data(vec0_vtab *pVtab, sqlite3_int64 rowid, sqlite3_bind_int64(pVtab->stmtRowidsGetChunkPosition, 1, rowid); int rc = sqlite3_step(pVtab->stmtRowidsGetChunkPosition); todo_assert(rc == SQLITE_ROW); - sqlite3_int64 chunk_id = - sqlite3_column_int64(pVtab->stmtRowidsGetChunkPosition, 1); - sqlite3_int64 chunk_offset = - sqlite3_column_int64(pVtab->stmtRowidsGetChunkPosition, 2); + i64 chunk_id = sqlite3_column_int64(pVtab->stmtRowidsGetChunkPosition, 1); + i64 chunk_offset = sqlite3_column_int64(pVtab->stmtRowidsGetChunkPosition, 2); rc = sqlite3_blob_reopen(pVtab->vectorBlobs[vector_column_idx], chunk_id); todo_assert(rc == SQLITE_OK); @@ -2673,9 +2674,8 @@ int vec0_get_vector_data(vec0_vtab *pVtab, sqlite3_int64 rowid, * @param chunk_offset: Output chunk_offset of the row * @return int: SQLITE_OK on success, error code on failure */ -int vec0_get_chunk_position(vec0_vtab *p, sqlite3_int64 rowid, - sqlite3_int64 *chunk_id, - sqlite3_int64 *chunk_offset) { +int vec0_get_chunk_position(vec0_vtab *p, i64 rowid, i64 *chunk_id, + i64 *chunk_offset) { int rc; sqlite3_reset(p->stmtRowidsGetChunkPosition); sqlite3_clear_bindings(p->stmtRowidsGetChunkPosition); @@ -2701,11 +2701,11 @@ int vec0_get_chunk_position(vec0_vtab *p, sqlite3_int64 rowid, * new chunk rowid. * @return int SQLITE_OK on success, error code otherwise. */ -int vec0_new_chunk(vec0_vtab *p, sqlite3_int64 *chunk_rowid) { +int vec0_new_chunk(vec0_vtab *p, i64 *chunk_rowid) { int rc; char *zSql; sqlite3_stmt *stmt; - sqlite3_int64 rowid; + i64 rowid; // Step 1: Insert a new row in _chunks, capture that new rowid zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_CHUNKS_NAME @@ -2725,7 +2725,7 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_int64 *chunk_rowid) { p->chunk_size / CHAR_BIT); // validity bitmap todo_assert(rc == SQLITE_OK); rc = sqlite3_bind_zeroblob(stmt, 3, - p->chunk_size * sizeof(sqlite3_int64)); // rowids + p->chunk_size * sizeof(i64)); // rowids todo_assert(rc == SQLITE_OK); rc = sqlite3_step(stmt); todo_assert(rc == SQLITE_DONE); @@ -2740,7 +2740,7 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_int64 *chunk_rowid) { for (int i = 0; i < p->numVectorColumns; i++) { - sqlite3_int64 vectorsSize = 0; + i64 vectorsSize = 0; switch (p->vector_columns[i].element_type) { case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: vectorsSize = @@ -2748,7 +2748,7 @@ int vec0_new_chunk(vec0_vtab *p, sqlite3_int64 *chunk_rowid) { break; case SQLITE_VEC_ELEMENT_TYPE_INT8: vectorsSize = - p->chunk_size * p->vector_columns[i].dimensions * sizeof(int8_t); + p->chunk_size * p->vector_columns[i].dimensions * sizeof(i8); break; case SQLITE_VEC_ELEMENT_TYPE_BIT: vectorsSize = @@ -2797,7 +2797,7 @@ typedef enum { struct vec0_query_fullscan_data { sqlite3_stmt *rowids_stmt; - int8_t done; + i8 done; }; int vec0_query_fullscan_data_clear( struct vec0_query_fullscan_data *fullscan_data) { @@ -2811,12 +2811,12 @@ int vec0_query_fullscan_data_clear( } struct vec0_query_knn_data { - sqlite3_int64 k; + i64 k; // Array of rowids of size k. Must be freed with sqlite3_freee(). - sqlite3_int64 *rowids; + i64 *rowids; // Array of distances of size k. Must be freed with sqlite3_freee(). float *distances; - sqlite3_int64 current_idx; + i64 current_idx; }; int vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) { if (knn_data->rowids) { @@ -2831,7 +2831,7 @@ int vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) { } struct vec0_query_point_data { - sqlite3_int64 rowid; + i64 rowid; void *vectors[VEC0_MAX_VECTOR_COLUMNS]; int done; }; @@ -3219,7 +3219,7 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) { #endif for (int i = 0; i < pIdxInfo->nConstraint; i++) { - uint8_t vtabIn = 0; + u8 vtabIn = 0; // sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22) // ref: https://www.sqlite.org/changes.html#version_3_38_0 if (sqlite3_libversion_number() >= 3038000) { @@ -3341,12 +3341,11 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) { // forward delcaration bc vec0Filter uses it static int vec0Next(sqlite3_vtab_cursor *cur); -void dethrone(int k, float *base_distances, sqlite3_int64 *base_rowids, - size_t chunk_size, int32_t *chunk_top_idx, float *chunk_distances, - sqlite3_int64 *chunk_rowids, +void dethrone(int k, float *base_distances, i64 *base_rowids, size_t chunk_size, + i32 *chunk_top_idx, float *chunk_distances, i64 *chunk_rowids, - sqlite3_int64 **out_rowids, float **out_distances) { - *out_rowids = sqlite3_malloc(k * sizeof(sqlite3_int64)); + i64 **out_rowids, float **out_distances) { + *out_rowids = sqlite3_malloc(k * sizeof(i64)); todo_assert(out_rowids); *out_distances = sqlite3_malloc(k * sizeof(float)); todo_assert(out_distances); @@ -3382,7 +3381,7 @@ void dethrone(int k, float *base_distances, sqlite3_int64 *base_rowids, * @param k: Size of output array * @return int */ -int min_idx(const float *distances, int32_t n, int32_t *out, int32_t k) { +int min_idx(const float *distances, i32 n, i32 *out, i32 k) { todo_assert(k > 0); todo_assert(k <= n); @@ -3432,13 +3431,13 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, size_t dimensions; enum VectorElementType elementType; vector_cleanup cleanup; - char * err; - rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType, &cleanup, &err); + char *err; + rc = vector_from_value(argv[0], &queryVector, &dimensions, &elementType, + &cleanup, &err); todo_assert(elementType == vector_column->element_type); todo_assert(dimensions == vector_column->dimensions); - - sqlite3_int64 k = sqlite3_value_int64(argv[1]); + i64 k = sqlite3_value_int64(argv[1]); todo_assert(k >= 0); if (k == 0) { knn_data->k = 0; @@ -3455,11 +3454,11 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, int rc; arrayRowidsIn = sqlite3_malloc(sizeof(struct Array)); todo_assert(arrayRowidsIn); - rc = array_init(arrayRowidsIn, sizeof(sqlite3_int64), 32); + rc = array_init(arrayRowidsIn, sizeof(i64), 32); todo_assert(rc == SQLITE_OK); for (rc = sqlite3_vtab_in_first(argv[2], &item); rc == SQLITE_OK && item; rc = sqlite3_vtab_in_next(argv[2], &item)) { - sqlite3_int64 rowid = sqlite3_value_int64(item); + i64 rowid = sqlite3_value_int64(item); rc = array_append(arrayRowidsIn, &rowid); todo_assert(rc == SQLITE_OK); } @@ -3468,7 +3467,7 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, _cmp); } - sqlite3_int64 *topk_rowids = sqlite3_malloc(k * sizeof(sqlite3_int64)); + i64 *topk_rowids = sqlite3_malloc(k * sizeof(i64)); todo_assert(topk_rowids); for (int i = 0; i < k; i++) { // TODO do we need to ensure that rowid is never -1? @@ -3497,7 +3496,7 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, todo_assert(rc == SQLITE_OK); void *baseVectors = NULL; - sqlite3_int64 baseVectorsSize = 0; + i64 baseVectorsSize = 0; while (true) { rc = sqlite3_step(stmtChunks); @@ -3506,22 +3505,21 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, if (rc != SQLITE_ROW) { todo("chunks iter error"); } - sqlite3_int64 chunk_id = sqlite3_column_int64(stmtChunks, 0); + i64 chunk_id = sqlite3_column_int64(stmtChunks, 0); unsigned char *chunkValidity = (unsigned char *)sqlite3_column_blob(stmtChunks, 1); - sqlite3_int64 validitySize = sqlite3_column_bytes(stmtChunks, 1); + i64 validitySize = sqlite3_column_bytes(stmtChunks, 1); todo_assert(validitySize == p->chunk_size / CHAR_BIT); - sqlite3_int64 *chunkRowids = - (sqlite3_int64 *)sqlite3_column_blob(stmtChunks, 2); - sqlite3_int64 rowidsSize = sqlite3_column_bytes(stmtChunks, 2); - todo_assert(rowidsSize == p->chunk_size * sizeof(sqlite3_int64)); + i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2); + i64 rowidsSize = sqlite3_column_bytes(stmtChunks, 2); + todo_assert(rowidsSize == p->chunk_size * sizeof(i64)); // open the vector chunk blob for the current chunk rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[vectorColumnIdx], "vectors", chunk_id, 0, &blobVectors); todo_assert(rc == SQLITE_OK); - sqlite3_int64 currentBaseVectorsSize = sqlite3_blob_bytes(blobVectors); + i64 currentBaseVectorsSize = sqlite3_blob_bytes(blobVectors); todo_assert((unsigned long)currentBaseVectorsSize == p->chunk_size * vector_column_byte_size(*vector_column)); @@ -3552,9 +3550,9 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, // If pre-filtering, make sure the rowid appears in the `rowid in (...)` // list. if (arrayRowidsIn) { - sqlite3_int64 rowid = chunkRowids[i]; + i64 rowid = chunkRowids[i]; void *in = bsearch(&rowid, arrayRowidsIn->z, arrayRowidsIn->length, - sizeof(sqlite3_int64), _cmp); + sizeof(i64), _cmp); if (!in) { chunk_distances[i] = __FLT_MAX__; continue; @@ -3584,17 +3582,17 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, break; } case SQLITE_VEC_ELEMENT_TYPE_INT8: { - const int8_t *base_i = - ((int8_t *)baseVectors) + (i * vector_column->dimensions); + const i8 *base_i = + ((i8 *)baseVectors) + (i * vector_column->dimensions); switch (vector_column->distance_metric) { case VEC0_DISTANCE_METRIC_L2: { - result = distance_l2_sqr_int8(base_i, (int8_t *)queryVector, + result = distance_l2_sqr_int8(base_i, (i8 *)queryVector, &vector_column->dimensions); break; } case VEC0_DISTANCE_METRIC_COSINE: { - result = distance_cosine_int8(base_i, (int8_t *)queryVector, + result = distance_cosine_int8(base_i, (i8 *)queryVector, &vector_column->dimensions); break; } @@ -3603,9 +3601,9 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, break; } case SQLITE_VEC_ELEMENT_TYPE_BIT: { - const uint8_t *base_i = ((uint8_t *)baseVectors) + - (i * (vector_column->dimensions / CHAR_BIT)); - result = distance_hamming(base_i, (uint8_t *)queryVector, + const u8 *base_i = ((u8 *)baseVectors) + + (i * (vector_column->dimensions / CHAR_BIT)); + result = distance_hamming(base_i, (u8 *)queryVector, &vector_column->dimensions); break; } @@ -3615,12 +3613,12 @@ int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum, } // now that we have the distances - int32_t *chunk_topk_idxs = sqlite3_malloc(k * sizeof(int32_t)); + i32 *chunk_topk_idxs = sqlite3_malloc(k * sizeof(i32)); todo_assert(chunk_topk_idxs); min_idx(chunk_distances, p->chunk_size, chunk_topk_idxs, k <= p->chunk_size ? k : p->chunk_size); - sqlite3_int64 *out_rowids; + i64 *out_rowids; float *out_distances; dethrone(k, topk_distances, topk_rowids, p->chunk_size, chunk_topk_idxs, chunk_distances, chunkRowids, @@ -3699,7 +3697,7 @@ int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int idxNum, UNUSED_PARAMETER(idxStr); int rc; todo_assert(argc == 1); - sqlite3_int64 rowid = sqlite3_value_int64(argv[0]); + i64 rowid = sqlite3_value_int64(argv[0]); pCur->query_plan = SQLITE_VEC0_QUERYPLAN_POINT; struct vec0_query_point_data *point_data = @@ -3800,8 +3798,7 @@ static int vec0Eof(sqlite3_vtab_cursor *cur) { static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur, sqlite3_context *context, int i) { todo_assert(pCur->fullscan_data); - sqlite3_int64 rowid = - sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0); + i64 rowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0); if (i == VEC0_COLUMN_ID) { vec0_result_id(pVtab, context, rowid); } else if (vec0_column_idx_is_vector(pVtab, i)) { @@ -3853,7 +3850,7 @@ static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur, sqlite3_context *context, int i) { todo_assert(pCur->knn_data); if (i == VEC0_COLUMN_ID) { - sqlite3_int64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; + i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx]; vec0_result_id(pVtab, context, rowid); return SQLITE_OK; } @@ -3902,16 +3899,16 @@ static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context, * * @param p: virtual table * @param idValue: Value containing the inserted rowid/id value. - * @param rowid: Output rowid, will point to the "real" sqlite3_int64 rowid + * @param rowid: Output rowid, will point to the "real" i64 rowid * value that was inserted * @return int SQLITE_OK on success, error code on failure */ int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue, - sqlite3_int64 *rowid) { + i64 *rowid) { /** * An insert into a vec0 table can happen a few different ways: - * 1) With default INTEGER primary key: With a supplied sqlite3_int64 rowid + * 1) With default INTEGER primary key: With a supplied i64 rowid * 2) With default INTEGER primary key: WITHOUT a supplied rowid * 3) With TEXT primary key: supplied text rowid */ @@ -3937,9 +3934,9 @@ int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue, #endif } - // Option 1: User supplied a sqlite3_int64 rowid + // Option 1: User supplied a i64 rowid else if (sqlite3_value_type(idValue) == SQLITE_INTEGER) { - sqlite3_int64 suppliedRowid = sqlite3_value_int64(idValue); + i64 suppliedRowid = sqlite3_value_int64(idValue); sqlite3_reset(p->stmtRowidsInsertRowid); sqlite3_clear_bindings(p->stmtRowidsInsertRowid); @@ -3987,12 +3984,12 @@ int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue, * @return int SQLITE_OK on success, error code on failure */ int vec0Update_InsertNextAvailableStep( - vec0_vtab *p, sqlite3_int64 *chunk_rowid, sqlite3_int64 *chunk_offset, + vec0_vtab *p, i64 *chunk_rowid, i64 *chunk_offset, sqlite3_blob **blobChunksValidity, const unsigned char **bufferChunksValidity) { int rc; - sqlite3_int64 validitySize; + i64 validitySize; *chunk_offset = -1; sqlite3_reset(p->stmtLatestChunk); @@ -4054,7 +4051,7 @@ done: } static int vec0Update_InsertWriteFinalStepVectors( - sqlite3_blob *blobVectors, const void *bVector, sqlite3_int64 chunk_offset, + sqlite3_blob *blobVectors, const void *bVector, i64 chunk_offset, size_t dimensions, enum VectorElementType element_type) { int n; int offset; @@ -4065,8 +4062,8 @@ static int vec0Update_InsertWriteFinalStepVectors( offset = chunk_offset * dimensions * sizeof(float); break; case SQLITE_VEC_ELEMENT_TYPE_INT8: - n = dimensions * sizeof(int8_t); - offset = chunk_offset * dimensions * sizeof(int8_t); + n = dimensions * sizeof(i8); + offset = chunk_offset * dimensions * sizeof(i8); break; case SQLITE_VEC_ELEMENT_TYPE_BIT: n = dimensions / CHAR_BIT; @@ -4092,9 +4089,9 @@ static int vec0Update_InsertWriteFinalStepVectors( * assigned chunk. * @return int SQLITE_OK on success, error code on failure */ -int vec0Update_InsertWriteFinalStep(vec0_vtab *p, sqlite3_int64 chunk_rowid, - sqlite3_int64 chunk_offset, - sqlite3_int64 rowid, void *vectorDatas[], +int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid, + i64 chunk_offset, i64 rowid, + void *vectorDatas[], sqlite3_blob *blobChunksValidity, const unsigned char *bufferChunksValidity) { int rc; @@ -4124,8 +4121,7 @@ int vec0Update_InsertWriteFinalStep(vec0_vtab *p, sqlite3_int64 chunk_rowid, break; case SQLITE_VEC_ELEMENT_TYPE_INT8: todo_assert((unsigned long)sqlite3_blob_bytes(blobVectors) == - p->chunk_size * p->vector_columns[i].dimensions * - sizeof(int8_t)); + p->chunk_size * p->vector_columns[i].dimensions * sizeof(i8)); break; case SQLITE_VEC_ELEMENT_TYPE_BIT: todo_assert((unsigned long)sqlite3_blob_bytes(blobVectors) == @@ -4145,9 +4141,9 @@ int vec0Update_InsertWriteFinalStep(vec0_vtab *p, sqlite3_int64 chunk_rowid, chunk_rowid, 1, &blobChunksRowids); todo_assert(rc == SQLITE_OK); todo_assert(sqlite3_blob_bytes(blobChunksRowids) == - p->chunk_size * sizeof(sqlite3_int64)); - rc = sqlite3_blob_write(blobChunksRowids, &rowid, sizeof(sqlite3_int64), - chunk_offset * sizeof(sqlite3_int64)); + p->chunk_size * sizeof(i64)); + rc = sqlite3_blob_write(blobChunksRowids, &rowid, sizeof(i64), + chunk_offset * sizeof(i64)); todo_assert(rc == SQLITE_OK); sqlite3_blob_close(blobChunksRowids); @@ -4176,16 +4172,16 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, int rc; // Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow // table - sqlite3_int64 rowid; + i64 rowid; // Array to hold the vector data of the inserted row. Individual elements will // have a lifetime bound to the argv[..] values. void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS]; // Rowid of the chunk in the _chunks shadow table that the row will be a part // of. - sqlite3_int64 chunk_rowid; + i64 chunk_rowid; // offset within the chunk where the rowid belongs - sqlite3_int64 chunk_offset; + i64 chunk_offset; // a write-able blob of the validity column for the given chunk. Used to mark // validity bit @@ -4262,8 +4258,8 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite_int64 rowid) { vec0_vtab *p = (vec0_vtab *)pVTab; int rc; - sqlite3_int64 chunk_id; - sqlite3_int64 chunk_offset; + i64 chunk_id; + i64 chunk_offset; sqlite3_blob *blobChunksValidity = NULL; // 1. get chunk_id and chunk_offset from _rowids @@ -4312,9 +4308,9 @@ int vec0Update_UpdateOnRowid(sqlite3_vtab *pVTab, int argc, UNUSED_PARAMETER(argc); vec0_vtab *p = (vec0_vtab *)pVTab; int rc; - sqlite3_int64 chunk_id; - sqlite3_int64 chunk_offset; - sqlite3_int64 rowid = sqlite3_value_int64(argv[0]); + i64 chunk_id; + i64 chunk_offset; + i64 rowid = sqlite3_value_int64(argv[0]); // 1. get chunk_id and chunk_offset from _rowids rc = vec0_get_chunk_position(p, rowid, &chunk_id, &chunk_offset); @@ -4332,7 +4328,7 @@ int vec0Update_UpdateOnRowid(sqlite3_vtab *pVTab, int argc, dimensions = sqlite3_value_bytes(valueVector) / sizeof(float); break; case SQLITE_VEC_ELEMENT_TYPE_INT8: - dimensions = sqlite3_value_bytes(valueVector) * sizeof(int8_t); + dimensions = sqlite3_value_bytes(valueVector) * sizeof(i8); break; case SQLITE_VEC_ELEMENT_TYPE_BIT: dimensions = sqlite3_value_bytes(valueVector) * CHAR_BIT; @@ -4477,7 +4473,7 @@ int sqlite3_mmap_warm(sqlite3 *db, const char *zDb) { sqlite3_file *pFd = 0; rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_FILE_POINTER, &pFd); if (rc == SQLITE_OK && pFd->pMethods && pFd->pMethods->iVersion >= 3) { - sqlite3_int64 iPg = 1; + i64 iPg = 1; sqlite3_io_methods const *p = pFd->pMethods; while (1) { unsigned char *pMap; diff --git a/tests/test-loadable.py b/tests/test-loadable.py index 68c3c7c..2acd75a 100644 --- a/tests/test-loadable.py +++ b/tests/test-loadable.py @@ -16,6 +16,7 @@ EXT_PATH = "./dist/vec0" SUPPORTS_SUBTYPE = sqlite3.version_info[1] > 38 + def bitmap_full(n: int) -> bytearray: assert (n % 8) == 0 return bytes([0xFF] * int(n / 8)) @@ -614,18 +615,17 @@ def test_smoke(): assert re.match( "SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:knn:", - explain_query_plan( "select * from vec_xyz where a match X'' and k = 10 order by distance" - ) + ), ) assert re.match( "SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:fullscan", - explain_query_plan("select * from vec_xyz") + explain_query_plan("select * from vec_xyz"), ) assert re.match( "SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 3:point", - explain_query_plan("select * from vec_xyz where rowid = 4") + explain_query_plan("select * from vec_xyz where rowid = 4"), ) db.execute("insert into vec_xyz(rowid, a) select 1, X'000000000000803f'")