2024-04-20 17:02:19 -07:00
# include "sqlite-vec.h"
2024-08-10 23:33:28 -07:00
2024-04-20 13:38:58 -07:00
# include <assert.h>
# include <errno.h>
2024-06-13 16:32:57 -07:00
# include <float.h>
2024-04-20 17:02:19 -07:00
# include <inttypes.h>
2024-04-20 13:38:58 -07:00
# include <limits.h>
# include <math.h>
# include <stdbool.h>
2024-04-20 17:02:19 -07:00
# include <stdint.h>
2024-04-20 13:38:58 -07:00
# include <stdlib.h>
# include <string.h>
2026-03-29 19:44:44 -07:00
# ifdef SQLITE_VEC_DEBUG
2024-08-10 23:33:28 -07:00
# include <stdio.h>
# endif
2024-09-13 12:46:13 -07:00
# ifndef SQLITE_CORE
2024-04-20 13:38:58 -07:00
# include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
2024-09-13 12:46:13 -07:00
# else
2024-09-20 13:17:57 -07:00
# include "sqlite3.h"
2024-09-13 12:46:13 -07:00
# endif
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# ifndef SQLITE_VEC_ENABLE_DISKANN
# define SQLITE_VEC_ENABLE_DISKANN 1
# endif
2024-04-20 17:02:19 -07:00
typedef int8_t i8 ;
typedef uint8_t u8 ;
2024-06-08 14:52:24 -04:00
typedef int16_t i16 ;
2024-04-20 17:02:19 -07:00
typedef int32_t i32 ;
typedef sqlite3_int64 i64 ;
typedef uint32_t u32 ;
typedef uint64_t u64 ;
typedef float f32 ;
typedef size_t usize ;
2024-04-20 13:38:58 -07:00
# ifndef UNUSED_PARAMETER
# define UNUSED_PARAMETER(X) (void)(X)
# endif
2024-08-10 23:33:28 -07:00
// sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22)
// https://www.sqlite.org/changes.html#version_3_38_0
2024-08-05 16:46:35 -07:00
# if SQLITE_VERSION_NUMBER >= 3038000
# define COMPILER_SUPPORTS_VTAB_IN 1
# endif
2026-03-30 23:17:30 -07:00
# ifndef SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
# define SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE 0
2026-03-29 19:46:23 -07:00
# endif
2024-08-05 16:46:35 -07:00
# ifndef SQLITE_SUBTYPE
# define SQLITE_SUBTYPE 0x000100000
# endif
# ifndef SQLITE_RESULT_SUBTYPE
# define SQLITE_RESULT_SUBTYPE 0x001000000
# endif
# ifndef SQLITE_INDEX_CONSTRAINT_LIMIT
2024-08-10 23:33:28 -07:00
# define SQLITE_INDEX_CONSTRAINT_LIMIT 73
2024-08-05 16:46:35 -07:00
# endif
2024-11-20 00:02:04 -08:00
# ifndef SQLITE_INDEX_CONSTRAINT_OFFSET
# define SQLITE_INDEX_CONSTRAINT_OFFSET 74
# endif
2024-04-20 13:38:58 -07:00
# define countof(x) (sizeof(x) / sizeof((x)[0]))
2024-07-05 12:07:45 -07:00
# define min(a, b) (((a) <= (b)) ? (a) : (b))
2024-04-20 13:38:58 -07:00
2026-03-29 19:45:54 -07:00
# ifndef SQLITE_VEC_ENABLE_RESCORE
# define SQLITE_VEC_ENABLE_RESCORE 1
# endif
2024-04-20 13:38:58 -07:00
enum VectorElementType {
2024-09-20 13:17:57 -07:00
// clang-format off
2024-04-20 13:38:58 -07:00
SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0 ,
2024-09-20 13:17:57 -07:00
SQLITE_VEC_ELEMENT_TYPE_BIT = 223 + 1 ,
SQLITE_VEC_ELEMENT_TYPE_INT8 = 223 + 2 ,
// clang-format on
2024-04-20 13:38:58 -07:00
} ;
# ifdef SQLITE_VEC_ENABLE_AVX
# include <immintrin.h>
# define PORTABLE_ALIGN32 __attribute__((aligned(32)))
# define PORTABLE_ALIGN64 __attribute__((aligned(64)))
2024-04-20 17:05:37 -07:00
static f32 l2_sqr_float_avx ( const void * pVect1v , const void * pVect2v ,
const void * qty_ptr ) {
f32 * pVect1 = ( f32 * ) pVect1v ;
f32 * pVect2 = ( f32 * ) pVect2v ;
2024-04-20 13:38:58 -07:00
size_t qty = * ( ( size_t * ) qty_ptr ) ;
2024-04-20 17:05:37 -07:00
f32 PORTABLE_ALIGN32 TmpRes [ 8 ] ;
2024-04-20 13:38:58 -07:00
size_t qty16 = qty > > 4 ;
2024-04-20 17:05:37 -07:00
const f32 * pEnd1 = pVect1 + ( qty16 < < 4 ) ;
2024-04-20 13:38:58 -07:00
__m256 diff , v1 , v2 ;
__m256 sum = _mm256_set1_ps ( 0 ) ;
while ( pVect1 < pEnd1 ) {
v1 = _mm256_loadu_ps ( pVect1 ) ;
pVect1 + = 8 ;
v2 = _mm256_loadu_ps ( pVect2 ) ;
pVect2 + = 8 ;
diff = _mm256_sub_ps ( v1 , v2 ) ;
sum = _mm256_add_ps ( sum , _mm256_mul_ps ( diff , diff ) ) ;
v1 = _mm256_loadu_ps ( pVect1 ) ;
pVect1 + = 8 ;
v2 = _mm256_loadu_ps ( pVect2 ) ;
pVect2 + = 8 ;
diff = _mm256_sub_ps ( v1 , v2 ) ;
sum = _mm256_add_ps ( sum , _mm256_mul_ps ( diff , diff ) ) ;
}
_mm256_store_ps ( TmpRes , sum ) ;
return sqrt ( TmpRes [ 0 ] + TmpRes [ 1 ] + TmpRes [ 2 ] + TmpRes [ 3 ] + TmpRes [ 4 ] +
TmpRes [ 5 ] + TmpRes [ 6 ] + TmpRes [ 7 ] ) ;
}
# endif
# ifdef SQLITE_VEC_ENABLE_NEON
# include <arm_neon.h>
# define PORTABLE_ALIGN32 __attribute__((aligned(32)))
// thx https://github.com/nmslib/hnswlib/pull/299/files
2024-04-20 17:05:37 -07:00
static f32 l2_sqr_float_neon ( const void * pVect1v , const void * pVect2v ,
const void * qty_ptr ) {
f32 * pVect1 = ( f32 * ) pVect1v ;
f32 * pVect2 = ( f32 * ) pVect2v ;
2024-04-20 13:38:58 -07:00
size_t qty = * ( ( size_t * ) qty_ptr ) ;
size_t qty16 = qty > > 4 ;
2024-04-20 17:05:37 -07:00
const f32 * pEnd1 = pVect1 + ( qty16 < < 4 ) ;
2024-04-20 13:38:58 -07:00
float32x4_t diff , v1 , v2 ;
float32x4_t sum0 = vdupq_n_f32 ( 0 ) ;
float32x4_t sum1 = vdupq_n_f32 ( 0 ) ;
float32x4_t sum2 = vdupq_n_f32 ( 0 ) ;
float32x4_t sum3 = vdupq_n_f32 ( 0 ) ;
while ( pVect1 < pEnd1 ) {
v1 = vld1q_f32 ( pVect1 ) ;
pVect1 + = 4 ;
v2 = vld1q_f32 ( pVect2 ) ;
pVect2 + = 4 ;
diff = vsubq_f32 ( v1 , v2 ) ;
sum0 = vfmaq_f32 ( sum0 , diff , diff ) ;
v1 = vld1q_f32 ( pVect1 ) ;
pVect1 + = 4 ;
v2 = vld1q_f32 ( pVect2 ) ;
pVect2 + = 4 ;
diff = vsubq_f32 ( v1 , v2 ) ;
sum1 = vfmaq_f32 ( sum1 , diff , diff ) ;
v1 = vld1q_f32 ( pVect1 ) ;
pVect1 + = 4 ;
v2 = vld1q_f32 ( pVect2 ) ;
pVect2 + = 4 ;
diff = vsubq_f32 ( v1 , v2 ) ;
sum2 = vfmaq_f32 ( sum2 , diff , diff ) ;
v1 = vld1q_f32 ( pVect1 ) ;
pVect1 + = 4 ;
v2 = vld1q_f32 ( pVect2 ) ;
pVect2 + = 4 ;
diff = vsubq_f32 ( v1 , v2 ) ;
sum3 = vfmaq_f32 ( sum3 , diff , diff ) ;
}
2024-06-13 16:32:57 -07:00
f32 sum_scalar =
vaddvq_f32 ( vaddq_f32 ( vaddq_f32 ( sum0 , sum1 ) , vaddq_f32 ( sum2 , sum3 ) ) ) ;
2024-06-10 14:56:42 -04:00
const f32 * pEnd2 = pVect1 + ( qty - ( qty16 < < 4 ) ) ;
while ( pVect1 < pEnd2 ) {
f32 diff = * pVect1 - * pVect2 ;
sum_scalar + = diff * diff ;
pVect1 + + ;
pVect2 + + ;
}
return sqrt ( sum_scalar ) ;
2024-04-20 13:38:58 -07:00
}
2024-06-08 14:52:24 -04:00
2026-03-29 19:44:44 -07:00
static f32 cosine_float_neon ( const void * pVect1v , const void * pVect2v ,
const void * qty_ptr ) {
f32 * pVect1 = ( f32 * ) pVect1v ;
f32 * pVect2 = ( f32 * ) pVect2v ;
size_t qty = * ( ( size_t * ) qty_ptr ) ;
size_t qty16 = qty > > 4 ;
const f32 * pEnd1 = pVect1 + ( qty16 < < 4 ) ;
float32x4_t dot0 = vdupq_n_f32 ( 0 ) , dot1 = vdupq_n_f32 ( 0 ) ;
float32x4_t dot2 = vdupq_n_f32 ( 0 ) , dot3 = vdupq_n_f32 ( 0 ) ;
float32x4_t amag0 = vdupq_n_f32 ( 0 ) , amag1 = vdupq_n_f32 ( 0 ) ;
float32x4_t amag2 = vdupq_n_f32 ( 0 ) , amag3 = vdupq_n_f32 ( 0 ) ;
float32x4_t bmag0 = vdupq_n_f32 ( 0 ) , bmag1 = vdupq_n_f32 ( 0 ) ;
float32x4_t bmag2 = vdupq_n_f32 ( 0 ) , bmag3 = vdupq_n_f32 ( 0 ) ;
while ( pVect1 < pEnd1 ) {
float32x4_t v1 , v2 ;
v1 = vld1q_f32 ( pVect1 ) ; pVect1 + = 4 ;
v2 = vld1q_f32 ( pVect2 ) ; pVect2 + = 4 ;
dot0 = vfmaq_f32 ( dot0 , v1 , v2 ) ;
amag0 = vfmaq_f32 ( amag0 , v1 , v1 ) ;
bmag0 = vfmaq_f32 ( bmag0 , v2 , v2 ) ;
v1 = vld1q_f32 ( pVect1 ) ; pVect1 + = 4 ;
v2 = vld1q_f32 ( pVect2 ) ; pVect2 + = 4 ;
dot1 = vfmaq_f32 ( dot1 , v1 , v2 ) ;
amag1 = vfmaq_f32 ( amag1 , v1 , v1 ) ;
bmag1 = vfmaq_f32 ( bmag1 , v2 , v2 ) ;
v1 = vld1q_f32 ( pVect1 ) ; pVect1 + = 4 ;
v2 = vld1q_f32 ( pVect2 ) ; pVect2 + = 4 ;
dot2 = vfmaq_f32 ( dot2 , v1 , v2 ) ;
amag2 = vfmaq_f32 ( amag2 , v1 , v1 ) ;
bmag2 = vfmaq_f32 ( bmag2 , v2 , v2 ) ;
v1 = vld1q_f32 ( pVect1 ) ; pVect1 + = 4 ;
v2 = vld1q_f32 ( pVect2 ) ; pVect2 + = 4 ;
dot3 = vfmaq_f32 ( dot3 , v1 , v2 ) ;
amag3 = vfmaq_f32 ( amag3 , v1 , v1 ) ;
bmag3 = vfmaq_f32 ( bmag3 , v2 , v2 ) ;
}
f32 dot_s = vaddvq_f32 ( vaddq_f32 ( vaddq_f32 ( dot0 , dot1 ) , vaddq_f32 ( dot2 , dot3 ) ) ) ;
f32 amag_s = vaddvq_f32 ( vaddq_f32 ( vaddq_f32 ( amag0 , amag1 ) , vaddq_f32 ( amag2 , amag3 ) ) ) ;
f32 bmag_s = vaddvq_f32 ( vaddq_f32 ( vaddq_f32 ( bmag0 , bmag1 ) , vaddq_f32 ( bmag2 , bmag3 ) ) ) ;
const f32 * pEnd2 = pVect1 + ( qty - ( qty16 < < 4 ) ) ;
while ( pVect1 < pEnd2 ) {
dot_s + = * pVect1 * * pVect2 ;
amag_s + = * pVect1 * * pVect1 ;
bmag_s + = * pVect2 * * pVect2 ;
pVect1 + + ; pVect2 + + ;
}
return 1.0f - ( dot_s / ( sqrtf ( amag_s ) * sqrtf ( bmag_s ) ) ) ;
}
2024-06-08 14:52:24 -04:00
static f32 l2_sqr_int8_neon ( const void * pVect1v , const void * pVect2v ,
const void * qty_ptr ) {
i8 * pVect1 = ( i8 * ) pVect1v ;
i8 * pVect2 = ( i8 * ) pVect2v ;
size_t qty = * ( ( size_t * ) qty_ptr ) ;
const i8 * pEnd1 = pVect1 + qty ;
i32 sum_scalar = 0 ;
while ( pVect1 < pEnd1 - 7 ) {
// loading 8 at a time
int8x8_t v1 = vld1_s8 ( pVect1 ) ;
int8x8_t v2 = vld1_s8 ( pVect2 ) ;
pVect1 + = 8 ;
pVect2 + = 8 ;
2026-03-31 14:55:37 -07:00
// widen i8 to i16 for subtraction
2024-06-08 14:52:24 -04:00
int16x8_t v1_wide = vmovl_s8 ( v1 ) ;
int16x8_t v2_wide = vmovl_s8 ( v2 ) ;
int16x8_t diff = vsubq_s16 ( v1_wide , v2_wide ) ;
2026-03-31 14:55:37 -07:00
// widening multiply: i16*i16 -> i32 to avoid i16 overflow
// (diff can be up to 255, so diff*diff can be up to 65025 > INT16_MAX)
int32x4_t sq_lo = vmull_s16 ( vget_low_s16 ( diff ) , vget_low_s16 ( diff ) ) ;
int32x4_t sq_hi = vmull_s16 ( vget_high_s16 ( diff ) , vget_high_s16 ( diff ) ) ;
int32x4_t sum = vaddq_s32 ( sq_lo , sq_hi ) ;
2024-06-08 14:52:24 -04:00
sum_scalar + = vgetq_lane_s32 ( sum , 0 ) + vgetq_lane_s32 ( sum , 1 ) +
2024-06-13 16:32:57 -07:00
vgetq_lane_s32 ( sum , 2 ) + vgetq_lane_s32 ( sum , 3 ) ;
2024-06-08 14:52:24 -04:00
}
// handle leftovers
while ( pVect1 < pEnd1 ) {
i16 diff = ( i16 ) * pVect1 - ( i16 ) * pVect2 ;
sum_scalar + = diff * diff ;
pVect1 + + ;
pVect2 + + ;
}
return sqrtf ( sum_scalar ) ;
}
2024-07-23 12:04:15 -04:00
static i32 l1_int8_neon ( const void * pVect1v , const void * pVect2v ,
const void * qty_ptr ) {
i8 * pVect1 = ( i8 * ) pVect1v ;
i8 * pVect2 = ( i8 * ) pVect2v ;
size_t qty = * ( ( size_t * ) qty_ptr ) ;
const int8_t * pEnd1 = pVect1 + qty ;
int32x4_t acc1 = vdupq_n_s32 ( 0 ) ;
int32x4_t acc2 = vdupq_n_s32 ( 0 ) ;
int32x4_t acc3 = vdupq_n_s32 ( 0 ) ;
int32x4_t acc4 = vdupq_n_s32 ( 0 ) ;
while ( pVect1 < pEnd1 - 63 ) {
int8x16_t v1 = vld1q_s8 ( pVect1 ) ;
int8x16_t v2 = vld1q_s8 ( pVect2 ) ;
int8x16_t diff1 = vabdq_s8 ( v1 , v2 ) ;
acc1 = vaddq_s32 ( acc1 , vpaddlq_u16 ( vpaddlq_u8 ( diff1 ) ) ) ;
v1 = vld1q_s8 ( pVect1 + 16 ) ;
v2 = vld1q_s8 ( pVect2 + 16 ) ;
int8x16_t diff2 = vabdq_s8 ( v1 , v2 ) ;
acc2 = vaddq_s32 ( acc2 , vpaddlq_u16 ( vpaddlq_u8 ( diff2 ) ) ) ;
v1 = vld1q_s8 ( pVect1 + 32 ) ;
v2 = vld1q_s8 ( pVect2 + 32 ) ;
int8x16_t diff3 = vabdq_s8 ( v1 , v2 ) ;
acc3 = vaddq_s32 ( acc3 , vpaddlq_u16 ( vpaddlq_u8 ( diff3 ) ) ) ;
v1 = vld1q_s8 ( pVect1 + 48 ) ;
v2 = vld1q_s8 ( pVect2 + 48 ) ;
int8x16_t diff4 = vabdq_s8 ( v1 , v2 ) ;
acc4 = vaddq_s32 ( acc4 , vpaddlq_u16 ( vpaddlq_u8 ( diff4 ) ) ) ;
pVect1 + = 64 ;
pVect2 + = 64 ;
}
while ( pVect1 < pEnd1 - 15 ) {
int8x16_t v1 = vld1q_s8 ( pVect1 ) ;
int8x16_t v2 = vld1q_s8 ( pVect2 ) ;
int8x16_t diff = vabdq_s8 ( v1 , v2 ) ;
acc1 = vaddq_s32 ( acc1 , vpaddlq_u16 ( vpaddlq_u8 ( diff ) ) ) ;
pVect1 + = 16 ;
pVect2 + = 16 ;
}
int32x4_t acc = vaddq_s32 ( vaddq_s32 ( acc1 , acc2 ) , vaddq_s32 ( acc3 , acc4 ) ) ;
int32_t sum = 0 ;
while ( pVect1 < pEnd1 ) {
int32_t diff = abs ( ( int32_t ) * pVect1 - ( int32_t ) * pVect2 ) ;
sum + = diff ;
pVect1 + + ;
pVect2 + + ;
}
return vaddvq_s32 ( acc ) + sum ;
}
static double l1_f32_neon ( const void * pVect1v , const void * pVect2v ,
const void * qty_ptr ) {
f32 * pVect1 = ( f32 * ) pVect1v ;
f32 * pVect2 = ( f32 * ) pVect2v ;
size_t qty = * ( ( size_t * ) qty_ptr ) ;
const f32 * pEnd1 = pVect1 + qty ;
float64x2_t acc = vdupq_n_f64 ( 0 ) ;
while ( pVect1 < pEnd1 - 3 ) {
float32x4_t v1 = vld1q_f32 ( pVect1 ) ;
float32x4_t v2 = vld1q_f32 ( pVect2 ) ;
pVect1 + = 4 ;
pVect2 + = 4 ;
// f32x4 -> f64x2 pad for overflow
float64x2_t low_diff = vabdq_f64 ( vcvt_f64_f32 ( vget_low_f32 ( v1 ) ) ,
vcvt_f64_f32 ( vget_low_f32 ( v2 ) ) ) ;
float64x2_t high_diff =
vabdq_f64 ( vcvt_high_f64_f32 ( v1 ) , vcvt_high_f64_f32 ( v2 ) ) ;
acc = vaddq_f64 ( acc , vaddq_f64 ( low_diff , high_diff ) ) ;
}
double sum = 0 ;
while ( pVect1 < pEnd1 ) {
sum + = fabs ( ( double ) * pVect1 - ( double ) * pVect2 ) ;
pVect1 + + ;
pVect2 + + ;
}
return vaddvq_f64 ( acc ) + sum ;
}
2024-04-20 13:38:58 -07:00
# endif
2024-04-20 17:05:37 -07:00
static f32 l2_sqr_float ( const void * pVect1v , const void * pVect2v ,
const void * qty_ptr ) {
f32 * pVect1 = ( f32 * ) pVect1v ;
f32 * pVect2 = ( f32 * ) pVect2v ;
2024-04-20 13:38:58 -07:00
size_t qty = * ( ( size_t * ) qty_ptr ) ;
2024-04-20 17:05:37 -07:00
f32 res = 0 ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < qty ; i + + ) {
2024-04-20 17:05:37 -07:00
f32 t = * pVect1 - * pVect2 ;
2024-04-20 13:38:58 -07:00
pVect1 + + ;
pVect2 + + ;
res + = t * t ;
}
return sqrt ( res ) ;
}
2024-04-20 17:05:37 -07:00
static f32 l2_sqr_int8 ( const void * pA , const void * pB , const void * pD ) {
2024-04-20 17:02:19 -07:00
i8 * a = ( i8 * ) pA ;
i8 * b = ( i8 * ) pB ;
2024-04-20 13:38:58 -07:00
size_t d = * ( ( size_t * ) pD ) ;
2024-04-20 17:05:37 -07:00
f32 res = 0 ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < d ; i + + ) {
2024-04-20 17:05:37 -07:00
f32 t = * a - * b ;
2024-04-20 13:38:58 -07:00
a + + ;
b + + ;
res + = t * t ;
}
return sqrt ( res ) ;
}
2024-04-20 17:05:37 -07:00
static f32 distance_l2_sqr_float ( const void * a , const void * b , const void * d ) {
2024-04-20 13:38:58 -07:00
# ifdef SQLITE_VEC_ENABLE_NEON
2024-06-10 14:56:42 -04:00
if ( ( * ( const size_t * ) d ) > 16 ) {
2024-04-20 13:38:58 -07:00
return l2_sqr_float_neon ( a , b , d ) ;
}
# endif
# ifdef SQLITE_VEC_ENABLE_AVX
if ( ( ( * ( const size_t * ) d ) % 16 = = 0 ) ) {
return l2_sqr_float_avx ( a , b , d ) ;
}
# endif
return l2_sqr_float ( a , b , d ) ;
}
2024-04-20 17:05:37 -07:00
static f32 distance_l2_sqr_int8 ( const void * a , const void * b , const void * d ) {
2024-06-13 16:32:57 -07:00
# ifdef SQLITE_VEC_ENABLE_NEON
2024-06-08 14:52:24 -04:00
if ( ( * ( const size_t * ) d ) > 7 ) {
return l2_sqr_int8_neon ( a , b , d ) ;
}
2024-06-13 16:32:57 -07:00
# endif
2024-04-20 13:38:58 -07:00
return l2_sqr_int8 ( a , b , d ) ;
}
2024-07-23 12:04:15 -04:00
static i32 l1_int8 ( const void * pA , const void * pB , const void * pD ) {
i8 * a = ( i8 * ) pA ;
i8 * b = ( i8 * ) pB ;
size_t d = * ( ( size_t * ) pD ) ;
i32 res = 0 ;
for ( size_t i = 0 ; i < d ; i + + ) {
res + = abs ( * a - * b ) ;
a + + ;
b + + ;
}
return res ;
}
static i32 distance_l1_int8 ( const void * a , const void * b , const void * d ) {
# ifdef SQLITE_VEC_ENABLE_NEON
if ( ( * ( const size_t * ) d ) > 15 ) {
return l1_int8_neon ( a , b , d ) ;
}
# endif
return l1_int8 ( a , b , d ) ;
}
static double l1_f32 ( const void * pA , const void * pB , const void * pD ) {
f32 * a = ( f32 * ) pA ;
f32 * b = ( f32 * ) pB ;
size_t d = * ( ( size_t * ) pD ) ;
double res = 0 ;
for ( size_t i = 0 ; i < d ; i + + ) {
res + = fabs ( ( double ) * a - ( double ) * b ) ;
a + + ;
b + + ;
}
return res ;
}
static double distance_l1_f32 ( const void * a , const void * b , const void * d ) {
# ifdef SQLITE_VEC_ENABLE_NEON
if ( ( * ( const size_t * ) d ) > 3 ) {
return l1_f32_neon ( a , b , d ) ;
}
# endif
return l1_f32 ( a , b , d ) ;
}
2024-04-20 17:05:37 -07:00
static f32 distance_cosine_float ( const void * pVect1v , const void * pVect2v ,
const void * qty_ptr ) {
2026-03-29 19:44:44 -07:00
# ifdef SQLITE_VEC_ENABLE_NEON
if ( ( * ( const size_t * ) qty_ptr ) > 16 ) {
return cosine_float_neon ( pVect1v , pVect2v , qty_ptr ) ;
}
# endif
2024-04-20 17:05:37 -07:00
f32 * pVect1 = ( f32 * ) pVect1v ;
f32 * pVect2 = ( f32 * ) pVect2v ;
2024-04-20 13:38:58 -07:00
size_t qty = * ( ( size_t * ) qty_ptr ) ;
2024-04-20 17:05:37 -07:00
f32 dot = 0 ;
f32 aMag = 0 ;
f32 bMag = 0 ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < qty ; i + + ) {
dot + = * pVect1 * * pVect2 ;
aMag + = * pVect1 * * pVect1 ;
bMag + = * pVect2 * * pVect2 ;
pVect1 + + ;
pVect2 + + ;
}
return 1 - ( dot / ( sqrt ( aMag ) * sqrt ( bMag ) ) ) ;
}
2026-03-29 19:44:44 -07:00
static f32 cosine_int8 ( const void * pA , const void * pB , const void * pD ) {
2024-04-20 17:02:19 -07:00
i8 * a = ( i8 * ) pA ;
i8 * b = ( i8 * ) pB ;
2024-04-20 13:38:58 -07:00
size_t d = * ( ( size_t * ) pD ) ;
2024-04-20 17:05:37 -07:00
f32 dot = 0 ;
f32 aMag = 0 ;
f32 bMag = 0 ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < d ; i + + ) {
dot + = * a * * b ;
aMag + = * a * * a ;
bMag + = * b * * b ;
a + + ;
b + + ;
}
return 1 - ( dot / ( sqrt ( aMag ) * sqrt ( bMag ) ) ) ;
}
2026-03-29 19:44:44 -07:00
# ifdef SQLITE_VEC_ENABLE_NEON
static f32 cosine_int8_neon ( const void * pA , const void * pB , const void * pD ) {
const i8 * a = ( const i8 * ) pA ;
const i8 * b = ( const i8 * ) pB ;
size_t d = * ( ( const size_t * ) pD ) ;
const i8 * aEnd = a + d ;
int32x4_t dot_acc1 = vdupq_n_s32 ( 0 ) ;
int32x4_t dot_acc2 = vdupq_n_s32 ( 0 ) ;
int32x4_t aMag_acc1 = vdupq_n_s32 ( 0 ) ;
int32x4_t aMag_acc2 = vdupq_n_s32 ( 0 ) ;
int32x4_t bMag_acc1 = vdupq_n_s32 ( 0 ) ;
int32x4_t bMag_acc2 = vdupq_n_s32 ( 0 ) ;
while ( a < aEnd - 31 ) {
int8x16_t va1 = vld1q_s8 ( a ) ;
int8x16_t vb1 = vld1q_s8 ( b ) ;
int16x8_t a1_lo = vmovl_s8 ( vget_low_s8 ( va1 ) ) ;
int16x8_t a1_hi = vmovl_s8 ( vget_high_s8 ( va1 ) ) ;
int16x8_t b1_lo = vmovl_s8 ( vget_low_s8 ( vb1 ) ) ;
int16x8_t b1_hi = vmovl_s8 ( vget_high_s8 ( vb1 ) ) ;
dot_acc1 = vmlal_s16 ( dot_acc1 , vget_low_s16 ( a1_lo ) , vget_low_s16 ( b1_lo ) ) ;
dot_acc1 = vmlal_s16 ( dot_acc1 , vget_high_s16 ( a1_lo ) , vget_high_s16 ( b1_lo ) ) ;
dot_acc2 = vmlal_s16 ( dot_acc2 , vget_low_s16 ( a1_hi ) , vget_low_s16 ( b1_hi ) ) ;
dot_acc2 = vmlal_s16 ( dot_acc2 , vget_high_s16 ( a1_hi ) , vget_high_s16 ( b1_hi ) ) ;
aMag_acc1 = vmlal_s16 ( aMag_acc1 , vget_low_s16 ( a1_lo ) , vget_low_s16 ( a1_lo ) ) ;
aMag_acc1 = vmlal_s16 ( aMag_acc1 , vget_high_s16 ( a1_lo ) , vget_high_s16 ( a1_lo ) ) ;
aMag_acc2 = vmlal_s16 ( aMag_acc2 , vget_low_s16 ( a1_hi ) , vget_low_s16 ( a1_hi ) ) ;
aMag_acc2 = vmlal_s16 ( aMag_acc2 , vget_high_s16 ( a1_hi ) , vget_high_s16 ( a1_hi ) ) ;
bMag_acc1 = vmlal_s16 ( bMag_acc1 , vget_low_s16 ( b1_lo ) , vget_low_s16 ( b1_lo ) ) ;
bMag_acc1 = vmlal_s16 ( bMag_acc1 , vget_high_s16 ( b1_lo ) , vget_high_s16 ( b1_lo ) ) ;
bMag_acc2 = vmlal_s16 ( bMag_acc2 , vget_low_s16 ( b1_hi ) , vget_low_s16 ( b1_hi ) ) ;
bMag_acc2 = vmlal_s16 ( bMag_acc2 , vget_high_s16 ( b1_hi ) , vget_high_s16 ( b1_hi ) ) ;
int8x16_t va2 = vld1q_s8 ( a + 16 ) ;
int8x16_t vb2 = vld1q_s8 ( b + 16 ) ;
int16x8_t a2_lo = vmovl_s8 ( vget_low_s8 ( va2 ) ) ;
int16x8_t a2_hi = vmovl_s8 ( vget_high_s8 ( va2 ) ) ;
int16x8_t b2_lo = vmovl_s8 ( vget_low_s8 ( vb2 ) ) ;
int16x8_t b2_hi = vmovl_s8 ( vget_high_s8 ( vb2 ) ) ;
dot_acc1 = vmlal_s16 ( dot_acc1 , vget_low_s16 ( a2_lo ) , vget_low_s16 ( b2_lo ) ) ;
dot_acc1 = vmlal_s16 ( dot_acc1 , vget_high_s16 ( a2_lo ) , vget_high_s16 ( b2_lo ) ) ;
dot_acc2 = vmlal_s16 ( dot_acc2 , vget_low_s16 ( a2_hi ) , vget_low_s16 ( b2_hi ) ) ;
dot_acc2 = vmlal_s16 ( dot_acc2 , vget_high_s16 ( a2_hi ) , vget_high_s16 ( b2_hi ) ) ;
aMag_acc1 = vmlal_s16 ( aMag_acc1 , vget_low_s16 ( a2_lo ) , vget_low_s16 ( a2_lo ) ) ;
aMag_acc1 = vmlal_s16 ( aMag_acc1 , vget_high_s16 ( a2_lo ) , vget_high_s16 ( a2_lo ) ) ;
aMag_acc2 = vmlal_s16 ( aMag_acc2 , vget_low_s16 ( a2_hi ) , vget_low_s16 ( a2_hi ) ) ;
aMag_acc2 = vmlal_s16 ( aMag_acc2 , vget_high_s16 ( a2_hi ) , vget_high_s16 ( a2_hi ) ) ;
bMag_acc1 = vmlal_s16 ( bMag_acc1 , vget_low_s16 ( b2_lo ) , vget_low_s16 ( b2_lo ) ) ;
bMag_acc1 = vmlal_s16 ( bMag_acc1 , vget_high_s16 ( b2_lo ) , vget_high_s16 ( b2_lo ) ) ;
bMag_acc2 = vmlal_s16 ( bMag_acc2 , vget_low_s16 ( b2_hi ) , vget_low_s16 ( b2_hi ) ) ;
bMag_acc2 = vmlal_s16 ( bMag_acc2 , vget_high_s16 ( b2_hi ) , vget_high_s16 ( b2_hi ) ) ;
a + = 32 ;
b + = 32 ;
}
while ( a < aEnd - 15 ) {
int8x16_t va = vld1q_s8 ( a ) ;
int8x16_t vb = vld1q_s8 ( b ) ;
int16x8_t a_lo = vmovl_s8 ( vget_low_s8 ( va ) ) ;
int16x8_t a_hi = vmovl_s8 ( vget_high_s8 ( va ) ) ;
int16x8_t b_lo = vmovl_s8 ( vget_low_s8 ( vb ) ) ;
int16x8_t b_hi = vmovl_s8 ( vget_high_s8 ( vb ) ) ;
dot_acc1 = vmlal_s16 ( dot_acc1 , vget_low_s16 ( a_lo ) , vget_low_s16 ( b_lo ) ) ;
dot_acc1 = vmlal_s16 ( dot_acc1 , vget_high_s16 ( a_lo ) , vget_high_s16 ( b_lo ) ) ;
dot_acc1 = vmlal_s16 ( dot_acc1 , vget_low_s16 ( a_hi ) , vget_low_s16 ( b_hi ) ) ;
dot_acc1 = vmlal_s16 ( dot_acc1 , vget_high_s16 ( a_hi ) , vget_high_s16 ( b_hi ) ) ;
aMag_acc1 = vmlal_s16 ( aMag_acc1 , vget_low_s16 ( a_lo ) , vget_low_s16 ( a_lo ) ) ;
aMag_acc1 = vmlal_s16 ( aMag_acc1 , vget_high_s16 ( a_lo ) , vget_high_s16 ( a_lo ) ) ;
aMag_acc1 = vmlal_s16 ( aMag_acc1 , vget_low_s16 ( a_hi ) , vget_low_s16 ( a_hi ) ) ;
aMag_acc1 = vmlal_s16 ( aMag_acc1 , vget_high_s16 ( a_hi ) , vget_high_s16 ( a_hi ) ) ;
bMag_acc1 = vmlal_s16 ( bMag_acc1 , vget_low_s16 ( b_lo ) , vget_low_s16 ( b_lo ) ) ;
bMag_acc1 = vmlal_s16 ( bMag_acc1 , vget_high_s16 ( b_lo ) , vget_high_s16 ( b_lo ) ) ;
bMag_acc1 = vmlal_s16 ( bMag_acc1 , vget_low_s16 ( b_hi ) , vget_low_s16 ( b_hi ) ) ;
bMag_acc1 = vmlal_s16 ( bMag_acc1 , vget_high_s16 ( b_hi ) , vget_high_s16 ( b_hi ) ) ;
a + = 16 ;
b + = 16 ;
}
int32x4_t dot_sum = vaddq_s32 ( dot_acc1 , dot_acc2 ) ;
int32x4_t aMag_sum = vaddq_s32 ( aMag_acc1 , aMag_acc2 ) ;
int32x4_t bMag_sum = vaddq_s32 ( bMag_acc1 , bMag_acc2 ) ;
i32 dot = vaddvq_s32 ( dot_sum ) ;
i32 aMag = vaddvq_s32 ( aMag_sum ) ;
i32 bMag = vaddvq_s32 ( bMag_sum ) ;
while ( a < aEnd ) {
dot + = ( i32 ) * a * ( i32 ) * b ;
aMag + = ( i32 ) * a * ( i32 ) * a ;
bMag + = ( i32 ) * b * ( i32 ) * b ;
a + + ;
b + + ;
}
return 1.0f - ( ( f32 ) dot / ( sqrtf ( ( f32 ) aMag ) * sqrtf ( ( f32 ) bMag ) ) ) ;
}
# endif
static f32 distance_cosine_int8 ( const void * a , const void * b , const void * d ) {
# ifdef SQLITE_VEC_ENABLE_NEON
if ( ( * ( const size_t * ) d ) > 15 ) {
return cosine_int8_neon ( a , b , d ) ;
}
# endif
return cosine_int8 ( a , b , d ) ;
}
2024-04-20 13:38:58 -07:00
// https://github.com/facebookresearch/faiss/blob/77e2e79cd0a680adc343b9840dd865da724c579e/faiss/utils/hamming_distance/common.h#L34
2024-04-20 17:02:19 -07:00
static u8 hamdist_table [ 256 ] = {
2024-04-20 13:38:58 -07:00
0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 ,
2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 ,
2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 ,
2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 ,
2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 ,
4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 ,
2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 ,
3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 ,
2 , 3 , 3 , 4 , 3 , 4 , 4 , 5 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 ,
4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 , 3 , 4 , 4 , 5 , 4 , 5 , 5 , 6 , 4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 ,
4 , 5 , 5 , 6 , 5 , 6 , 6 , 7 , 5 , 6 , 6 , 7 , 6 , 7 , 7 , 8 } ;
2026-03-29 19:44:44 -07:00
# ifdef SQLITE_VEC_ENABLE_NEON
static f32 distance_hamming_neon ( const u8 * a , const u8 * b , size_t n_bytes ) {
const u8 * pEnd = a + n_bytes ;
uint32x4_t acc1 = vdupq_n_u32 ( 0 ) ;
uint32x4_t acc2 = vdupq_n_u32 ( 0 ) ;
uint32x4_t acc3 = vdupq_n_u32 ( 0 ) ;
uint32x4_t acc4 = vdupq_n_u32 ( 0 ) ;
while ( a < = pEnd - 64 ) {
uint8x16_t v1 = vld1q_u8 ( a ) ;
uint8x16_t v2 = vld1q_u8 ( b ) ;
acc1 = vaddq_u32 ( acc1 , vpaddlq_u16 ( vpaddlq_u8 ( vcntq_u8 ( veorq_u8 ( v1 , v2 ) ) ) ) ) ;
v1 = vld1q_u8 ( a + 16 ) ;
v2 = vld1q_u8 ( b + 16 ) ;
acc2 = vaddq_u32 ( acc2 , vpaddlq_u16 ( vpaddlq_u8 ( vcntq_u8 ( veorq_u8 ( v1 , v2 ) ) ) ) ) ;
v1 = vld1q_u8 ( a + 32 ) ;
v2 = vld1q_u8 ( b + 32 ) ;
acc3 = vaddq_u32 ( acc3 , vpaddlq_u16 ( vpaddlq_u8 ( vcntq_u8 ( veorq_u8 ( v1 , v2 ) ) ) ) ) ;
v1 = vld1q_u8 ( a + 48 ) ;
v2 = vld1q_u8 ( b + 48 ) ;
acc4 = vaddq_u32 ( acc4 , vpaddlq_u16 ( vpaddlq_u8 ( vcntq_u8 ( veorq_u8 ( v1 , v2 ) ) ) ) ) ;
a + = 64 ;
b + = 64 ;
}
while ( a < = pEnd - 16 ) {
uint8x16_t v1 = vld1q_u8 ( a ) ;
uint8x16_t v2 = vld1q_u8 ( b ) ;
acc1 = vaddq_u32 ( acc1 , vpaddlq_u16 ( vpaddlq_u8 ( vcntq_u8 ( veorq_u8 ( v1 , v2 ) ) ) ) ) ;
a + = 16 ;
b + = 16 ;
}
acc1 = vaddq_u32 ( acc1 , acc2 ) ;
acc3 = vaddq_u32 ( acc3 , acc4 ) ;
acc1 = vaddq_u32 ( acc1 , acc3 ) ;
u32 sum = vaddvq_u32 ( acc1 ) ;
while ( a < pEnd ) {
sum + = hamdist_table [ * a ^ * b ] ;
a + + ;
b + + ;
}
return ( f32 ) sum ;
}
# endif
2026-03-31 17:39:41 -07:00
# ifdef SQLITE_VEC_ENABLE_AVX
/**
* AVX2 Hamming distance using VPSHUFB - based popcount .
* Processes 32 bytes ( 256 bits ) per iteration .
*/
static f32 distance_hamming_avx2 ( const u8 * a , const u8 * b , size_t n_bytes ) {
const u8 * pEnd = a + n_bytes ;
// VPSHUFB lookup table: popcount of low nibble
const __m256i lookup = _mm256_setr_epi8 (
0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 ,
0 , 1 , 1 , 2 , 1 , 2 , 2 , 3 , 1 , 2 , 2 , 3 , 2 , 3 , 3 , 4 ) ;
const __m256i low_mask = _mm256_set1_epi8 ( 0x0f ) ;
__m256i acc = _mm256_setzero_si256 ( ) ;
while ( a < = pEnd - 32 ) {
__m256i va = _mm256_loadu_si256 ( ( const __m256i * ) a ) ;
__m256i vb = _mm256_loadu_si256 ( ( const __m256i * ) b ) ;
__m256i xored = _mm256_xor_si256 ( va , vb ) ;
// VPSHUFB popcount: split into nibbles, lookup each
__m256i lo = _mm256_and_si256 ( xored , low_mask ) ;
__m256i hi = _mm256_and_si256 ( _mm256_srli_epi16 ( xored , 4 ) , low_mask ) ;
__m256i popcnt = _mm256_add_epi8 ( _mm256_shuffle_epi8 ( lookup , lo ) ,
_mm256_shuffle_epi8 ( lookup , hi ) ) ;
// Horizontal sum: u8 -> u64 via sad against zero
acc = _mm256_add_epi64 ( acc , _mm256_sad_epu8 ( popcnt , _mm256_setzero_si256 ( ) ) ) ;
a + = 32 ;
b + = 32 ;
}
// Horizontal sum of 4 x u64 lanes
u64 tmp [ 4 ] ;
_mm256_storeu_si256 ( ( __m256i * ) tmp , acc ) ;
u32 sum = ( u32 ) ( tmp [ 0 ] + tmp [ 1 ] + tmp [ 2 ] + tmp [ 3 ] ) ;
// Scalar tail
while ( a < pEnd ) {
u8 x = * a ^ * b ;
x = x - ( ( x > > 1 ) & 0x55 ) ;
x = ( x & 0x33 ) + ( ( x > > 2 ) & 0x33 ) ;
sum + = ( x + ( x > > 4 ) ) & 0x0F ;
a + + ;
b + + ;
}
return ( f32 ) sum ;
}
# endif
2024-04-20 17:05:37 -07:00
static f32 distance_hamming_u8 ( u8 * a , u8 * b , size_t n ) {
2024-04-20 13:38:58 -07:00
int same = 0 ;
for ( unsigned long i = 0 ; i < n ; i + + ) {
same + = hamdist_table [ a [ i ] ^ b [ i ] ] ;
}
2024-04-20 17:05:37 -07:00
return ( f32 ) same ;
2024-04-20 13:38:58 -07:00
}
2024-06-09 14:53:12 +08:00
# ifdef _MSC_VER
2024-08-10 23:33:28 -07:00
# if !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
// From
// https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c,
// line 34-43
2024-08-09 13:07:15 -04:00
static unsigned int __builtin_popcountl ( unsigned int x ) {
unsigned int c = 0 ;
for ( ; x ; + + c ) {
x & = x - 1 ;
}
return c ;
}
# else
2024-06-13 16:32:57 -07:00
# include <intrin.h>
# define __builtin_popcountl __popcnt64
2024-06-09 14:53:12 +08:00
# endif
2024-08-09 13:07:15 -04:00
# endif
2024-06-09 14:53:12 +08:00
2026-03-31 14:57:01 -07:00
static f32 distance_hamming_u64 ( const u8 * a , const u8 * b , size_t n ) {
2024-04-20 13:38:58 -07:00
int same = 0 ;
for ( unsigned long i = 0 ; i < n ; i + + ) {
2026-03-31 14:57:01 -07:00
u64 va , vb ;
memcpy ( & va , a + i * sizeof ( u64 ) , sizeof ( u64 ) ) ;
memcpy ( & vb , b + i * sizeof ( u64 ) , sizeof ( u64 ) ) ;
same + = __builtin_popcountl ( va ^ vb ) ;
2024-04-20 13:38:58 -07:00
}
2024-04-20 17:05:37 -07:00
return ( f32 ) same ;
2024-04-20 13:38:58 -07:00
}
2024-05-26 20:54:37 -07:00
/**
* @ brief Calculate the hamming distance between two bitvectors .
*
* @ param a - first bitvector , MUST have d dimensions
* @ param b - second bitvector , MUST have d dimensions
* @ param d - pointer to size_t , MUST be divisible by CHAR_BIT
* @ return f32
*/
2024-04-20 17:05:37 -07:00
static f32 distance_hamming ( const void * a , const void * b , const void * d ) {
2024-04-20 13:38:58 -07:00
size_t dimensions = * ( ( size_t * ) d ) ;
2026-03-29 19:44:44 -07:00
size_t n_bytes = dimensions / CHAR_BIT ;
# ifdef SQLITE_VEC_ENABLE_NEON
if ( dimensions > = 128 ) {
return distance_hamming_neon ( ( const u8 * ) a , ( const u8 * ) b , n_bytes ) ;
}
# endif
2026-03-31 17:39:41 -07:00
# ifdef SQLITE_VEC_ENABLE_AVX
if ( n_bytes > = 32 ) {
return distance_hamming_avx2 ( ( const u8 * ) a , ( const u8 * ) b , n_bytes ) ;
}
# endif
2024-04-20 13:38:58 -07:00
if ( ( dimensions % 64 ) = = 0 ) {
2026-03-31 14:57:01 -07:00
return distance_hamming_u64 ( ( const u8 * ) a , ( const u8 * ) b , n_bytes / sizeof ( u64 ) ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
return distance_hamming_u8 ( ( u8 * ) a , ( u8 * ) b , n_bytes ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-02 18:04:30 -08:00
# ifdef SQLITE_VEC_TEST
f32 _test_distance_l2_sqr_float ( const f32 * a , const f32 * b , size_t dims ) {
return distance_l2_sqr_float ( a , b , & dims ) ;
}
f32 _test_distance_cosine_float ( const f32 * a , const f32 * b , size_t dims ) {
return distance_cosine_float ( a , b , & dims ) ;
}
f32 _test_distance_hamming ( const u8 * a , const u8 * b , size_t dims ) {
return distance_hamming ( a , b , & dims ) ;
}
# endif
2024-04-20 13:38:58 -07:00
// from SQLite source:
// https://github.com/sqlite/sqlite/blob/a509a90958ddb234d1785ed7801880ccb18b497e/src/json.c#L153
2024-08-09 10:44:39 -07:00
static const char vecJsonIsSpaceX [ ] = {
2024-04-20 13:38:58 -07:00
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
} ;
2024-08-09 13:26:45 -04:00
2024-08-09 10:44:39 -07:00
# define vecJsonIsspace(x) (vecJsonIsSpaceX[(unsigned char)x])
2024-04-20 13:38:58 -07:00
typedef void ( * vector_cleanup ) ( void * p ) ;
void vector_cleanup_noop ( void * _ ) { UNUSED_PARAMETER ( _ ) ; }
# define JSON_SUBTYPE 74
2024-06-24 23:26:11 -07:00
void vtab_set_error ( sqlite3_vtab * pVTab , const char * zFormat , . . . ) {
va_list args ;
sqlite3_free ( pVTab - > zErrMsg ) ;
va_start ( args , zFormat ) ;
pVTab - > zErrMsg = sqlite3_vmprintf ( zFormat , args ) ;
va_end ( args ) ;
}
2024-04-20 13:38:58 -07:00
struct Array {
size_t element_size ;
size_t length ;
size_t capacity ;
void * z ;
} ;
2024-05-12 00:16:10 -07:00
/**
* @ brief Initial an array with the given element size and capacity .
*
* @ param array
* @ param element_size
* @ param init_capacity
2024-06-13 16:32:57 -07:00
* @ return SQLITE_OK on success , error code on failure . Only error is
* SQLITE_NOMEM
2024-05-12 00:16:10 -07:00
*/
2024-04-20 13:38:58 -07:00
int array_init ( struct Array * array , size_t element_size , size_t init_capacity ) {
2024-06-28 19:21:50 -07:00
int sz = element_size * init_capacity ;
void * z = sqlite3_malloc ( sz ) ;
2024-04-20 13:38:58 -07:00
if ( ! z ) {
return SQLITE_NOMEM ;
}
2024-06-28 19:21:50 -07:00
memset ( z , 0 , sz ) ;
2024-04-20 13:38:58 -07:00
array - > element_size = element_size ;
array - > length = 0 ;
array - > capacity = init_capacity ;
array - > z = z ;
return SQLITE_OK ;
}
int array_append ( struct Array * array , const void * element ) {
if ( array - > length = = array - > capacity ) {
size_t new_capacity = array - > capacity * 2 + 100 ;
void * z = sqlite3_realloc64 ( array - > z , array - > element_size * new_capacity ) ;
if ( z ) {
array - > capacity = new_capacity ;
array - > z = z ;
} else {
return SQLITE_NOMEM ;
}
}
2024-07-05 12:07:45 -07:00
memcpy ( & ( ( unsigned char * ) array - > z ) [ array - > length * array - > element_size ] ,
element , array - > element_size ) ;
2024-04-20 13:38:58 -07:00
array - > length + + ;
return SQLITE_OK ;
}
void array_cleanup ( struct Array * array ) {
2024-07-05 12:07:45 -07:00
if ( ! array )
return ;
2024-04-20 13:38:58 -07:00
array - > element_size = 0 ;
array - > length = 0 ;
array - > capacity = 0 ;
sqlite3_free ( array - > z ) ;
array - > z = NULL ;
}
2024-06-28 10:51:49 -07:00
char * vector_subtype_name ( int subtype ) {
switch ( subtype ) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 :
return " float32 " ;
case SQLITE_VEC_ELEMENT_TYPE_INT8 :
return " int8 " ;
case SQLITE_VEC_ELEMENT_TYPE_BIT :
return " bit " ;
}
return " " ;
}
char * type_name ( int type ) {
switch ( type ) {
case SQLITE_INTEGER :
return " INTEGER " ;
case SQLITE_BLOB :
return " BLOB " ;
case SQLITE_TEXT :
return " TEXT " ;
case SQLITE_FLOAT :
return " FLOAT " ;
case SQLITE_NULL :
return " NULL " ;
}
return " " ;
}
2026-03-03 07:16:33 -08:00
typedef void ( * fvec_cleanup ) ( void * vector ) ;
2024-04-20 13:38:58 -07:00
2026-03-03 07:16:33 -08:00
void fvec_cleanup_noop ( void * _ ) { UNUSED_PARAMETER ( _ ) ; }
2024-04-20 13:38:58 -07:00
2024-04-20 17:05:37 -07:00
static int fvec_from_value ( sqlite3_value * value , f32 * * vector ,
2024-04-20 13:38:58 -07:00
size_t * dimensions , fvec_cleanup * cleanup ,
char * * pzErr ) {
int value_type = sqlite3_value_type ( value ) ;
2024-05-12 00:16:10 -07:00
2024-04-20 13:38:58 -07:00
if ( value_type = = SQLITE_BLOB ) {
const void * blob = sqlite3_value_blob ( value ) ;
int bytes = sqlite3_value_bytes ( value ) ;
if ( bytes = = 0 ) {
* pzErr = sqlite3_mprintf ( " zero-length vectors are not supported. " ) ;
return SQLITE_ERROR ;
}
2024-04-20 17:05:37 -07:00
if ( ( bytes % sizeof ( f32 ) ) ! = 0 ) {
2024-04-20 13:38:58 -07:00
* pzErr = sqlite3_mprintf ( " invalid float32 vector BLOB length. Must be "
" divisible by %d, found %d " ,
2024-04-20 17:05:37 -07:00
sizeof ( f32 ) , bytes ) ;
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR ;
}
2026-03-03 07:16:33 -08:00
f32 * buf = sqlite3_malloc ( bytes ) ;
if ( ! buf ) {
* pzErr = sqlite3_mprintf ( " out of memory " ) ;
return SQLITE_NOMEM ;
}
memcpy ( buf , blob , bytes ) ;
2026-03-31 17:52:12 -07:00
size_t n = bytes / sizeof ( f32 ) ;
for ( size_t i = 0 ; i < n ; i + + ) {
if ( isnan ( buf [ i ] ) | | isinf ( buf [ i ] ) ) {
* pzErr = sqlite3_mprintf (
" invalid float32 vector: element %d is %s " ,
( int ) i , isnan ( buf [ i ] ) ? " NaN " : " Inf " ) ;
sqlite3_free ( buf ) ;
return SQLITE_ERROR ;
}
}
2026-03-03 07:16:33 -08:00
* vector = buf ;
2026-03-31 17:52:12 -07:00
* dimensions = n ;
2026-03-03 07:16:33 -08:00
* cleanup = sqlite3_free ;
2024-04-20 13:38:58 -07:00
return SQLITE_OK ;
}
if ( value_type = = SQLITE_TEXT ) {
const char * source = ( const char * ) sqlite3_value_text ( value ) ;
int source_len = sqlite3_value_bytes ( value ) ;
2024-07-23 23:57:42 -07:00
if ( source_len = = 0 ) {
2024-07-16 22:28:15 -07:00
* pzErr = sqlite3_mprintf ( " zero-length vectors are not supported. " ) ;
return SQLITE_ERROR ;
}
2024-04-20 13:38:58 -07:00
int i = 0 ;
struct Array x ;
2024-04-20 17:05:37 -07:00
int rc = array_init ( & x , sizeof ( f32 ) , ceil ( source_len / 2.0 ) ) ;
2024-06-13 16:32:57 -07:00
if ( rc ! = SQLITE_OK ) {
2024-05-12 00:16:10 -07:00
return rc ;
}
2024-04-20 13:38:58 -07:00
// advance leading whitespace to first '['
while ( i < source_len ) {
2024-08-09 10:44:39 -07:00
if ( vecJsonIsspace ( source [ i ] ) ) {
2024-04-20 13:38:58 -07:00
i + + ;
continue ;
}
if ( source [ i ] = = ' [ ' ) {
break ;
}
* pzErr = sqlite3_mprintf (
" JSON array parsing error: Input does not start with '[' " ) ;
array_cleanup ( & x ) ;
return SQLITE_ERROR ;
}
if ( source [ i ] ! = ' [ ' ) {
* pzErr = sqlite3_mprintf (
" JSON array parsing error: Input does not start with '[' " ) ;
array_cleanup ( & x ) ;
return SQLITE_ERROR ;
}
int offset = i + 1 ;
while ( offset < source_len ) {
char * ptr = ( char * ) & source [ offset ] ;
char * endptr ;
errno = 0 ;
double result = strtod ( ptr , & endptr ) ;
if ( ( errno ! = 0 & & result = = 0 ) // some interval error?
| | ( errno = = ERANGE & &
( result = = HUGE_VAL | | result = = - HUGE_VAL ) ) // too big / smalls
) {
sqlite3_free ( x . z ) ;
* pzErr = sqlite3_mprintf ( " JSON parsing error " ) ;
return SQLITE_ERROR ;
}
if ( endptr = = ptr ) {
if ( * ptr ! = ' ] ' ) {
sqlite3_free ( x . z ) ;
* pzErr = sqlite3_mprintf ( " JSON parsing error " ) ;
return SQLITE_ERROR ;
}
goto done ;
}
2024-04-20 17:05:37 -07:00
f32 res = ( f32 ) result ;
2026-03-31 17:52:12 -07:00
if ( isnan ( res ) | | isinf ( res ) ) {
sqlite3_free ( x . z ) ;
* pzErr = sqlite3_mprintf (
" invalid float32 vector: element %d is %s " ,
( int ) x . length , isnan ( res ) ? " NaN " : " Inf " ) ;
return SQLITE_ERROR ;
}
2024-04-20 13:38:58 -07:00
array_append ( & x , ( const void * ) & res ) ;
offset + = ( endptr - ptr ) ;
while ( offset < source_len ) {
2024-08-09 10:44:39 -07:00
if ( vecJsonIsspace ( source [ offset ] ) ) {
2024-04-20 13:38:58 -07:00
offset + + ;
continue ;
}
if ( source [ offset ] = = ' , ' ) {
offset + + ;
continue ;
2024-07-31 12:56:09 -07:00
}
2024-04-20 13:38:58 -07:00
if ( source [ offset ] = = ' ] ' )
goto done ;
break ;
}
}
done :
if ( x . length > 0 ) {
2024-04-20 17:05:37 -07:00
* vector = ( f32 * ) x . z ;
2024-04-20 13:38:58 -07:00
* dimensions = x . length ;
2026-03-03 07:16:33 -08:00
* cleanup = sqlite3_free ;
2024-04-20 13:38:58 -07:00
return SQLITE_OK ;
}
sqlite3_free ( x . z ) ;
* pzErr = sqlite3_mprintf ( " zero-length vectors are not supported. " ) ;
return SQLITE_ERROR ;
}
* pzErr = sqlite3_mprintf (
2024-06-28 10:51:59 -07:00
" Input must have type BLOB (compact format) or TEXT (JSON), found %s " ,
type_name ( value_type ) ) ;
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR ;
}
2024-04-20 17:02:19 -07:00
static int bitvec_from_value ( sqlite3_value * value , u8 * * vector ,
2024-04-20 13:38:58 -07:00
size_t * dimensions , vector_cleanup * cleanup ,
char * * pzErr ) {
int value_type = sqlite3_value_type ( value ) ;
if ( value_type = = SQLITE_BLOB ) {
const void * blob = sqlite3_value_blob ( value ) ;
int bytes = sqlite3_value_bytes ( value ) ;
if ( bytes = = 0 ) {
* pzErr = sqlite3_mprintf ( " zero-length vectors are not supported. " ) ;
return SQLITE_ERROR ;
}
2024-04-20 17:02:19 -07:00
* vector = ( u8 * ) blob ;
2024-04-20 13:38:58 -07:00
* dimensions = bytes * CHAR_BIT ;
* cleanup = vector_cleanup_noop ;
return SQLITE_OK ;
}
* pzErr = sqlite3_mprintf ( " Unknown type for bitvector. " ) ;
return SQLITE_ERROR ;
}
2024-04-20 17:02:19 -07:00
static int int8_vec_from_value ( sqlite3_value * value , i8 * * vector ,
2024-04-20 13:38:58 -07:00
size_t * dimensions , vector_cleanup * cleanup ,
2024-06-28 10:51:59 -07:00
char * * pzErr ) {
2024-04-20 13:38:58 -07:00
int value_type = sqlite3_value_type ( value ) ;
if ( value_type = = SQLITE_BLOB ) {
const void * blob = sqlite3_value_blob ( value ) ;
int bytes = sqlite3_value_bytes ( value ) ;
if ( bytes = = 0 ) {
* pzErr = sqlite3_mprintf ( " zero-length vectors are not supported. " ) ;
return SQLITE_ERROR ;
}
2024-04-20 17:02:19 -07:00
* vector = ( i8 * ) blob ;
2024-04-20 13:38:58 -07:00
* dimensions = bytes ;
* cleanup = vector_cleanup_noop ;
return SQLITE_OK ;
}
2024-06-08 14:50:10 -04:00
2024-06-13 16:32:57 -07:00
if ( value_type = = SQLITE_TEXT ) {
2024-06-08 14:50:10 -04:00
const char * source = ( const char * ) sqlite3_value_text ( value ) ;
int source_len = sqlite3_value_bytes ( value ) ;
int i = 0 ;
2024-07-23 23:57:42 -07:00
if ( source_len = = 0 ) {
2024-07-16 22:28:15 -07:00
* pzErr = sqlite3_mprintf ( " zero-length vectors are not supported. " ) ;
return SQLITE_ERROR ;
}
2024-06-08 14:50:10 -04:00
struct Array x ;
int rc = array_init ( & x , sizeof ( i8 ) , ceil ( source_len / 2.0 ) ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
// advance leading whitespace to first '['
while ( i < source_len ) {
2024-08-09 10:44:39 -07:00
if ( vecJsonIsspace ( source [ i ] ) ) {
2024-06-08 14:50:10 -04:00
i + + ;
continue ;
}
if ( source [ i ] = = ' [ ' ) {
break ;
}
* pzErr = sqlite3_mprintf (
" JSON array parsing error: Input does not start with '[' " ) ;
array_cleanup ( & x ) ;
return SQLITE_ERROR ;
}
if ( source [ i ] ! = ' [ ' ) {
* pzErr = sqlite3_mprintf (
" JSON array parsing error: Input does not start with '[' " ) ;
array_cleanup ( & x ) ;
return SQLITE_ERROR ;
}
int offset = i + 1 ;
while ( offset < source_len ) {
char * ptr = ( char * ) & source [ offset ] ;
char * endptr ;
errno = 0 ;
long result = strtol ( ptr , & endptr , 10 ) ;
2024-06-13 16:32:57 -07:00
if ( ( errno ! = 0 & & result = = 0 ) | |
( errno = = ERANGE & & ( result = = LONG_MAX | | result = = LONG_MIN ) ) ) {
2024-06-08 14:50:10 -04:00
sqlite3_free ( x . z ) ;
* pzErr = sqlite3_mprintf ( " JSON parsing error " ) ;
return SQLITE_ERROR ;
}
if ( endptr = = ptr ) {
if ( * ptr ! = ' ] ' ) {
sqlite3_free ( x . z ) ;
* pzErr = sqlite3_mprintf ( " JSON parsing error " ) ;
return SQLITE_ERROR ;
}
goto done ;
}
if ( result < INT8_MIN | | result > INT8_MAX ) {
sqlite3_free ( x . z ) ;
2024-06-13 16:32:57 -07:00
* pzErr =
sqlite3_mprintf ( " JSON parsing error: value out of range for int8 " ) ;
2024-06-08 14:50:10 -04:00
return SQLITE_ERROR ;
}
i8 res = ( i8 ) result ;
array_append ( & x , ( const void * ) & res ) ;
offset + = ( endptr - ptr ) ;
while ( offset < source_len ) {
2024-08-09 10:44:39 -07:00
if ( vecJsonIsspace ( source [ offset ] ) ) {
2024-06-08 14:50:10 -04:00
offset + + ;
continue ;
}
if ( source [ offset ] = = ' , ' ) {
offset + + ;
continue ;
}
if ( source [ offset ] = = ' ] ' )
goto done ;
break ;
}
}
done :
if ( x . length > 0 ) {
* vector = ( i8 * ) x . z ;
* dimensions = x . length ;
* cleanup = ( vector_cleanup ) sqlite3_free ;
return SQLITE_OK ;
}
sqlite3_free ( x . z ) ;
* pzErr = sqlite3_mprintf ( " zero-length vectors are not supported. " ) ;
return SQLITE_ERROR ;
}
2024-04-20 13:38:58 -07:00
* pzErr = sqlite3_mprintf ( " Unknown type for int8 vector. " ) ;
return SQLITE_ERROR ;
}
/**
* @ brief Extract a vector from a sqlite3_value . Can be a float32 , int8 , or bit
* vector .
*
* @ param value : the sqlite3_value to read from .
* @ param vector : Output pointer to vector data .
* @ param dimensions : Output number of dimensions
* @ param dimensions : Output vector element type
* @ param cleanup
* @ param pzErrorMessage
* @ return int SQLITE_OK on success , error code otherwise
*/
int vector_from_value ( sqlite3_value * value , void * * vector , size_t * dimensions ,
enum VectorElementType * element_type ,
vector_cleanup * cleanup , char * * pzErrorMessage ) {
int subtype = sqlite3_value_subtype ( value ) ;
if ( ! subtype | | ( subtype = = SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) | |
( subtype = = JSON_SUBTYPE ) ) {
2024-04-20 17:05:37 -07:00
int rc = fvec_from_value ( value , ( f32 * * ) vector , dimensions ,
2024-04-20 13:38:58 -07:00
( fvec_cleanup * ) cleanup , pzErrorMessage ) ;
if ( rc = = SQLITE_OK ) {
* element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ;
}
return rc ;
}
if ( subtype = = SQLITE_VEC_ELEMENT_TYPE_BIT ) {
2024-04-20 17:02:19 -07:00
int rc = bitvec_from_value ( value , ( u8 * * ) vector , dimensions , cleanup ,
2024-04-20 13:38:58 -07:00
pzErrorMessage ) ;
if ( rc = = SQLITE_OK ) {
* element_type = SQLITE_VEC_ELEMENT_TYPE_BIT ;
}
return rc ;
}
if ( subtype = = SQLITE_VEC_ELEMENT_TYPE_INT8 ) {
2024-04-20 17:02:19 -07:00
int rc = int8_vec_from_value ( value , ( i8 * * ) vector , dimensions , cleanup ,
2024-04-20 13:38:58 -07:00
pzErrorMessage ) ;
if ( rc = = SQLITE_OK ) {
* element_type = SQLITE_VEC_ELEMENT_TYPE_INT8 ;
}
return rc ;
}
* pzErrorMessage = sqlite3_mprintf ( " Unknown subtype: %d " , subtype ) ;
return SQLITE_ERROR ;
}
int ensure_vector_match ( sqlite3_value * aValue , sqlite3_value * bValue , void * * a ,
void * * b , enum VectorElementType * element_type ,
size_t * dimensions , vector_cleanup * outACleanup ,
vector_cleanup * outBCleanup , char * * outError ) {
int rc ;
enum VectorElementType aType , bType ;
size_t aDims , bDims ;
2024-07-16 22:28:15 -07:00
char * error = NULL ;
2024-04-20 13:38:58 -07:00
vector_cleanup aCleanup , bCleanup ;
rc = vector_from_value ( aValue , a , & aDims , & aType , & aCleanup , & error ) ;
if ( rc ! = SQLITE_OK ) {
* outError = sqlite3_mprintf ( " Error reading 1st vector: %s " , error ) ;
sqlite3_free ( error ) ;
return SQLITE_ERROR ;
}
rc = vector_from_value ( bValue , b , & bDims , & bType , & bCleanup , & error ) ;
if ( rc ! = SQLITE_OK ) {
* outError = sqlite3_mprintf ( " Error reading 2nd vector: %s " , error ) ;
sqlite3_free ( error ) ;
2026-03-02 20:50:54 -08:00
aCleanup ( * a ) ;
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR ;
}
if ( aType ! = bType ) {
* outError =
sqlite3_mprintf ( " Vector type mistmatch. First vector has type %s, "
" while the second has type %s. " ,
vector_subtype_name ( aType ) , vector_subtype_name ( bType ) ) ;
2024-07-16 22:28:15 -07:00
aCleanup ( * a ) ;
bCleanup ( * b ) ;
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR ;
}
if ( aDims ! = bDims ) {
* outError = sqlite3_mprintf (
" Vector dimension mistmatch. First vector has %ld dimensions, "
" while the second has %ld dimensions. " ,
aDims , bDims ) ;
2024-07-16 22:28:15 -07:00
aCleanup ( * a ) ;
bCleanup ( * b ) ;
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR ;
}
* element_type = aType ;
* dimensions = aDims ;
* outACleanup = aCleanup ;
* outBCleanup = bCleanup ;
return SQLITE_OK ;
}
2024-04-20 17:02:19 -07:00
int _cmp ( const void * a , const void * b ) { return ( * ( i64 * ) a - * ( i64 * ) b ) ; }
2024-04-20 13:38:58 -07:00
2024-06-24 23:26:11 -07:00
# pragma region scalar functions
2024-04-20 13:38:58 -07:00
static void vec_f32 ( sqlite3_context * context , int argc , sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 1 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
2024-07-16 22:28:15 -07:00
f32 * vector = NULL ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
fvec_cleanup cleanup ;
char * errmsg ;
rc = fvec_from_value ( argv [ 0 ] , & vector , & dimensions , & cleanup , & errmsg ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , errmsg , - 1 ) ;
sqlite3_free ( errmsg ) ;
return ;
}
2024-04-20 17:05:37 -07:00
sqlite3_result_blob ( context , vector , dimensions * sizeof ( f32 ) ,
2024-06-24 23:26:11 -07:00
( void ( * ) ( void * ) ) cleanup ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) ;
}
2024-06-24 23:26:11 -07:00
2024-04-20 13:38:58 -07:00
static void vec_bit ( sqlite3_context * context , int argc , sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 1 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
2024-04-20 17:02:19 -07:00
u8 * vector ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
vector_cleanup cleanup ;
char * errmsg ;
rc = bitvec_from_value ( argv [ 0 ] , & vector , & dimensions , & cleanup , & errmsg ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , errmsg , - 1 ) ;
sqlite3_free ( errmsg ) ;
return ;
}
sqlite3_result_blob ( context , vector , dimensions / CHAR_BIT , SQLITE_TRANSIENT ) ;
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_BIT ) ;
cleanup ( vector ) ;
}
static void vec_int8 ( sqlite3_context * context , int argc , sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 1 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
2024-04-20 17:02:19 -07:00
i8 * vector ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
vector_cleanup cleanup ;
char * errmsg ;
rc = int8_vec_from_value ( argv [ 0 ] , & vector , & dimensions , & cleanup , & errmsg ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , errmsg , - 1 ) ;
sqlite3_free ( errmsg ) ;
return ;
}
sqlite3_result_blob ( context , vector , dimensions , SQLITE_TRANSIENT ) ;
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_INT8 ) ;
cleanup ( vector ) ;
}
static void vec_length ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 1 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
void * vector ;
size_t dimensions ;
vector_cleanup cleanup ;
char * errmsg ;
enum VectorElementType elementType ;
rc = vector_from_value ( argv [ 0 ] , & vector , & dimensions , & elementType , & cleanup ,
& errmsg ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , errmsg , - 1 ) ;
sqlite3_free ( errmsg ) ;
return ;
}
sqlite3_result_int64 ( context , dimensions ) ;
cleanup ( vector ) ;
}
static void vec_distance_cosine ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 2 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
2024-07-16 22:28:15 -07:00
void * a = NULL , * b = NULL ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
vector_cleanup aCleanup , bCleanup ;
char * error ;
enum VectorElementType elementType ;
rc = ensure_vector_match ( argv [ 0 ] , argv [ 1 ] , & a , & b , & elementType , & dimensions ,
& aCleanup , & bCleanup , & error ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , error , - 1 ) ;
sqlite3_free ( error ) ;
return ;
}
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
sqlite3_result_error (
context , " Cannot calculate cosine distance between two bitvectors. " ,
- 1 ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
2024-04-20 17:05:37 -07:00
f32 result = distance_cosine_float ( a , b , & dimensions ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_double ( context , result ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
2024-04-20 17:05:37 -07:00
f32 result = distance_cosine_int8 ( a , b , & dimensions ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_double ( context , result ) ;
goto finish ;
}
}
finish :
aCleanup ( a ) ;
bCleanup ( b ) ;
return ;
}
static void vec_distance_l2 ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 2 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
2024-07-16 22:28:15 -07:00
void * a = NULL , * b = NULL ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
vector_cleanup aCleanup , bCleanup ;
char * error ;
enum VectorElementType elementType ;
rc = ensure_vector_match ( argv [ 0 ] , argv [ 1 ] , & a , & b , & elementType , & dimensions ,
& aCleanup , & bCleanup , & error ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , error , - 1 ) ;
sqlite3_free ( error ) ;
return ;
}
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
sqlite3_result_error (
context , " Cannot calculate L2 distance between two bitvectors. " , - 1 ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
2024-04-20 17:05:37 -07:00
f32 result = distance_l2_sqr_float ( a , b , & dimensions ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_double ( context , result ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
2024-04-20 17:05:37 -07:00
f32 result = distance_l2_sqr_int8 ( a , b , & dimensions ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_double ( context , result ) ;
goto finish ;
}
}
finish :
aCleanup ( a ) ;
bCleanup ( b ) ;
return ;
}
2024-07-23 12:04:15 -04:00
static void vec_distance_l1 ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
assert ( argc = = 2 ) ;
int rc ;
void * a , * b ;
size_t dimensions ;
vector_cleanup aCleanup , bCleanup ;
char * error ;
enum VectorElementType elementType ;
rc = ensure_vector_match ( argv [ 0 ] , argv [ 1 ] , & a , & b , & elementType , & dimensions ,
& aCleanup , & bCleanup , & error ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , error , - 1 ) ;
sqlite3_free ( error ) ;
return ;
}
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
sqlite3_result_error (
context , " Cannot calculate L1 distance between two bitvectors. " , - 1 ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
double result = distance_l1_f32 ( a , b , & dimensions ) ;
sqlite3_result_double ( context , result ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
i64 result = distance_l1_int8 ( a , b , & dimensions ) ;
sqlite3_result_int ( context , result ) ;
goto finish ;
}
}
finish :
aCleanup ( a ) ;
bCleanup ( b ) ;
return ;
}
2024-04-20 13:38:58 -07:00
static void vec_distance_hamming ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 2 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
2024-07-16 22:28:15 -07:00
void * a = NULL , * b = NULL ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
vector_cleanup aCleanup , bCleanup ;
char * error ;
enum VectorElementType elementType ;
rc = ensure_vector_match ( argv [ 0 ] , argv [ 1 ] , & a , & b , & elementType , & dimensions ,
& aCleanup , & bCleanup , & error ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , error , - 1 ) ;
sqlite3_free ( error ) ;
return ;
}
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
sqlite3_result_double ( context , distance_hamming ( a , b , & dimensions ) ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
sqlite3_result_error (
context ,
" Cannot calculate hamming distance between two float32 vectors. " , - 1 ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
sqlite3_result_error (
context , " Cannot calculate hamming distance between two int8 vectors. " ,
- 1 ) ;
goto finish ;
}
}
finish :
aCleanup ( a ) ;
bCleanup ( b ) ;
return ;
}
2024-07-23 23:57:42 -07:00
char * vec_type_name ( enum VectorElementType elementType ) {
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 :
return " float32 " ;
case SQLITE_VEC_ELEMENT_TYPE_INT8 :
return " int8 " ;
case SQLITE_VEC_ELEMENT_TYPE_BIT :
return " bit " ;
2024-07-22 21:24:44 -07:00
}
2024-08-09 10:25:31 -07:00
return " " ;
2024-07-22 21:24:44 -07:00
}
2024-07-23 23:57:42 -07:00
static void vec_type ( sqlite3_context * context , int argc , sqlite3_value * * argv ) {
2024-07-22 21:24:44 -07:00
assert ( argc = = 1 ) ;
void * vector ;
size_t dimensions ;
vector_cleanup cleanup ;
char * pzError ;
enum VectorElementType elementType ;
int rc = vector_from_value ( argv [ 0 ] , & vector , & dimensions , & elementType ,
& cleanup , & pzError ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , pzError , - 1 ) ;
sqlite3_free ( pzError ) ;
return ;
}
sqlite3_result_text ( context , vec_type_name ( elementType ) , - 1 , SQLITE_STATIC ) ;
cleanup ( vector ) ;
}
static void vec_quantize_binary ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
assert ( argc = = 1 ) ;
void * vector ;
size_t dimensions ;
vector_cleanup vectorCleanup ;
char * pzError ;
enum VectorElementType elementType ;
int rc = vector_from_value ( argv [ 0 ] , & vector , & dimensions , & elementType ,
& vectorCleanup , & pzError ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , pzError , - 1 ) ;
sqlite3_free ( pzError ) ;
return ;
}
2024-07-23 23:57:42 -07:00
if ( dimensions < = 0 ) {
2024-07-22 21:24:44 -07:00
sqlite3_result_error ( context , " Zero length vectors are not supported. " , - 1 ) ;
goto cleanup ;
return ;
}
2024-07-23 23:57:42 -07:00
if ( ( dimensions % CHAR_BIT ) ! = 0 ) {
sqlite3_result_error (
context ,
" Binary quantization requires vectors with a length divisible by 8 " ,
- 1 ) ;
2024-07-22 21:24:44 -07:00
goto cleanup ;
return ;
}
int sz = dimensions / CHAR_BIT ;
u8 * out = sqlite3_malloc ( sz ) ;
if ( ! out ) {
sqlite3_result_error_code ( context , SQLITE_NOMEM ) ;
goto cleanup ;
return ;
}
memset ( out , 0 , sz ) ;
2024-07-23 23:57:42 -07:00
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
2024-07-22 21:24:44 -07:00
2024-07-23 23:57:42 -07:00
for ( size_t i = 0 ; i < dimensions ; i + + ) {
int res = ( ( f32 * ) vector ) [ i ] > 0.0 ;
out [ i / 8 ] | = ( res < < ( i % 8 ) ) ;
2024-07-22 21:24:44 -07:00
}
2024-07-23 23:57:42 -07:00
break ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
for ( size_t i = 0 ; i < dimensions ; i + + ) {
int res = ( ( i8 * ) vector ) [ i ] > 0 ;
out [ i / 8 ] | = ( res < < ( i % 8 ) ) ;
2024-07-22 21:24:44 -07:00
}
2024-07-23 23:57:42 -07:00
break ;
}
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
sqlite3_result_error ( context ,
" Can only binary quantize float or int8 vectors " , - 1 ) ;
sqlite3_free ( out ) ;
return ;
}
2024-07-22 21:24:44 -07:00
}
sqlite3_result_blob ( context , out , sz , sqlite3_free ) ;
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_BIT ) ;
2024-07-23 23:57:42 -07:00
cleanup :
vectorCleanup ( vector ) ;
2024-07-22 21:24:44 -07:00
}
static void vec_quantize_int8 ( sqlite3_context * context , int argc ,
2024-07-23 23:57:42 -07:00
sqlite3_value * * argv ) {
2024-07-22 21:24:44 -07:00
assert ( argc = = 2 ) ;
2024-04-20 17:05:37 -07:00
f32 * srcVector ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
2024-07-05 12:07:45 -07:00
fvec_cleanup srcCleanup ;
2024-04-20 13:38:58 -07:00
char * err ;
2024-07-05 12:07:45 -07:00
i8 * out = NULL ;
int rc = fvec_from_value ( argv [ 0 ] , & srcVector , & dimensions , & srcCleanup , & err ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , err , - 1 ) ;
sqlite3_free ( err ) ;
return ;
}
2024-06-28 19:21:50 -07:00
int sz = dimensions * sizeof ( i8 ) ;
2024-07-05 12:07:45 -07:00
out = sqlite3_malloc ( sz ) ;
if ( ! out ) {
2024-07-22 21:24:44 -07:00
sqlite3_result_error_nomem ( context ) ;
2024-07-05 12:07:45 -07:00
goto cleanup ;
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , sz ) ;
2024-04-20 13:38:58 -07:00
2024-07-22 21:24:44 -07:00
if ( ( sqlite3_value_type ( argv [ 1 ] ) ! = SQLITE_TEXT ) | |
( sqlite3_value_bytes ( argv [ 1 ] ) ! = strlen ( " unit " ) ) | |
( sqlite3_stricmp ( ( const char * ) sqlite3_value_text ( argv [ 1 ] ) , " unit " ) ! =
2024-07-23 23:57:42 -07:00
0 ) ) {
sqlite3_result_error (
2024-08-01 02:45:51 -07:00
context , " 2nd argument to vec_quantize_int8() must be 'unit'. " , - 1 ) ;
2024-07-05 12:07:45 -07:00
sqlite3_free ( out ) ;
goto cleanup ;
2024-04-20 13:38:58 -07:00
}
2024-07-22 21:24:44 -07:00
f32 step = ( 1.0 - ( - 1.0 ) ) / 255 ;
for ( size_t i = 0 ; i < dimensions ; i + + ) {
2026-03-03 07:16:33 -08:00
double val = ( ( srcVector [ i ] - ( - 1.0 ) ) / step ) - 128 ;
2026-03-03 08:36:59 -08:00
if ( ! ( val < = 127.0 ) ) val = 127.0 ; /* also clamps NaN */
if ( ! ( val > = - 128.0 ) ) val = - 128.0 ;
2026-03-03 07:16:33 -08:00
out [ i ] = ( i8 ) val ;
2024-07-22 21:24:44 -07:00
}
2024-04-20 13:38:58 -07:00
2024-04-20 17:02:19 -07:00
sqlite3_result_blob ( context , out , dimensions * sizeof ( i8 ) , sqlite3_free ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_INT8 ) ;
2024-07-05 12:07:45 -07:00
cleanup :
srcCleanup ( srcVector ) ;
2024-04-20 13:38:58 -07:00
}
static void vec_add ( sqlite3_context * context , int argc , sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 2 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
2024-07-16 22:28:15 -07:00
void * a = NULL , * b = NULL ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
vector_cleanup aCleanup , bCleanup ;
char * error ;
enum VectorElementType elementType ;
rc = ensure_vector_match ( argv [ 0 ] , argv [ 1 ] , & a , & b , & elementType , & dimensions ,
& aCleanup , & bCleanup , & error ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , error , - 1 ) ;
sqlite3_free ( error ) ;
return ;
}
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
sqlite3_result_error ( context , " Cannot add two bitvectors together. " , - 1 ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
2024-04-20 17:05:37 -07:00
size_t outSize = dimensions * sizeof ( f32 ) ;
f32 * out = sqlite3_malloc ( outSize ) ;
2024-04-20 13:38:58 -07:00
if ( ! out ) {
sqlite3_result_error_nomem ( context ) ;
goto finish ;
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , outSize ) ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < dimensions ; i + + ) {
2024-04-20 17:05:37 -07:00
out [ i ] = ( ( f32 * ) a ) [ i ] + ( ( f32 * ) b ) [ i ] ;
2024-04-20 13:38:58 -07:00
}
sqlite3_result_blob ( context , out , outSize , sqlite3_free ) ;
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
2024-04-20 17:02:19 -07:00
size_t outSize = dimensions * sizeof ( i8 ) ;
i8 * out = sqlite3_malloc ( outSize ) ;
2024-04-20 13:38:58 -07:00
if ( ! out ) {
sqlite3_result_error_nomem ( context ) ;
goto finish ;
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , outSize ) ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < dimensions ; i + + ) {
2024-04-20 17:02:19 -07:00
out [ i ] = ( ( i8 * ) a ) [ i ] + ( ( i8 * ) b ) [ i ] ;
2024-04-20 13:38:58 -07:00
}
sqlite3_result_blob ( context , out , outSize , sqlite3_free ) ;
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_INT8 ) ;
goto finish ;
}
}
finish :
aCleanup ( a ) ;
bCleanup ( b ) ;
return ;
}
static void vec_sub ( sqlite3_context * context , int argc , sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 2 ) ;
2024-04-20 13:38:58 -07:00
int rc ;
2024-07-16 22:28:15 -07:00
void * a = NULL , * b = NULL ;
2024-04-20 13:38:58 -07:00
size_t dimensions ;
vector_cleanup aCleanup , bCleanup ;
char * error ;
enum VectorElementType elementType ;
rc = ensure_vector_match ( argv [ 0 ] , argv [ 1 ] , & a , & b , & elementType , & dimensions ,
& aCleanup , & bCleanup , & error ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , error , - 1 ) ;
sqlite3_free ( error ) ;
return ;
}
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
sqlite3_result_error ( context , " Cannot subtract two bitvectors together. " ,
- 1 ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
2024-04-20 17:05:37 -07:00
size_t outSize = dimensions * sizeof ( f32 ) ;
f32 * out = sqlite3_malloc ( outSize ) ;
2024-04-20 13:38:58 -07:00
if ( ! out ) {
sqlite3_result_error_nomem ( context ) ;
goto finish ;
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , outSize ) ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < dimensions ; i + + ) {
2024-04-20 17:05:37 -07:00
out [ i ] = ( ( f32 * ) a ) [ i ] - ( ( f32 * ) b ) [ i ] ;
2024-04-20 13:38:58 -07:00
}
sqlite3_result_blob ( context , out , outSize , sqlite3_free ) ;
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) ;
goto finish ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
2024-04-20 17:02:19 -07:00
size_t outSize = dimensions * sizeof ( i8 ) ;
i8 * out = sqlite3_malloc ( outSize ) ;
2024-04-20 13:38:58 -07:00
if ( ! out ) {
sqlite3_result_error_nomem ( context ) ;
goto finish ;
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , outSize ) ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < dimensions ; i + + ) {
2024-04-20 17:02:19 -07:00
out [ i ] = ( ( i8 * ) a ) [ i ] - ( ( i8 * ) b ) [ i ] ;
2024-04-20 13:38:58 -07:00
}
sqlite3_result_blob ( context , out , outSize , sqlite3_free ) ;
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_INT8 ) ;
goto finish ;
}
}
finish :
aCleanup ( a ) ;
bCleanup ( b ) ;
return ;
}
static void vec_slice ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 3 ) ;
2024-04-20 13:38:58 -07:00
void * vector ;
size_t dimensions ;
vector_cleanup cleanup ;
char * err ;
enum VectorElementType elementType ;
int rc = vector_from_value ( argv [ 0 ] , & vector , & dimensions , & elementType ,
& cleanup , & err ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , err , - 1 ) ;
sqlite3_free ( err ) ;
return ;
}
int start = sqlite3_value_int ( argv [ 1 ] ) ;
int end = sqlite3_value_int ( argv [ 2 ] ) ;
2024-07-05 12:07:45 -07:00
2024-04-20 13:38:58 -07:00
if ( start < 0 ) {
sqlite3_result_error ( context ,
" slice 'start' index must be a postive number. " , - 1 ) ;
goto done ;
}
if ( end < 0 ) {
sqlite3_result_error ( context , " slice 'end' index must be a postive number. " ,
- 1 ) ;
goto done ;
}
if ( ( ( size_t ) start ) > dimensions ) {
sqlite3_result_error (
context , " slice 'start' index is greater than the number of dimensions " ,
- 1 ) ;
goto done ;
}
if ( ( ( size_t ) end ) > dimensions ) {
sqlite3_result_error (
context , " slice 'end' index is greater than the number of dimensions " ,
- 1 ) ;
goto done ;
}
if ( start > end ) {
sqlite3_result_error ( context ,
" slice 'start' index is greater than 'end' index " , - 1 ) ;
goto done ;
}
2024-07-05 12:07:45 -07:00
if ( start = = end ) {
sqlite3_result_error ( context ,
" slice 'start' index is equal to the 'end' index, "
" vectors must have non-zero length " ,
- 1 ) ;
goto done ;
}
2024-04-20 13:38:58 -07:00
size_t n = end - start ;
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
2024-06-28 19:21:50 -07:00
int outSize = n * sizeof ( f32 ) ;
f32 * out = sqlite3_malloc ( outSize ) ;
2024-04-20 13:38:58 -07:00
if ( ! out ) {
sqlite3_result_error_nomem ( context ) ;
2024-07-05 12:07:45 -07:00
goto done ;
2024-04-20 13:38:58 -07:00
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , outSize ) ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < n ; i + + ) {
2024-04-20 17:05:37 -07:00
out [ i ] = ( ( f32 * ) vector ) [ start + i ] ;
2024-04-20 13:38:58 -07:00
}
2024-07-05 12:07:45 -07:00
sqlite3_result_blob ( context , out , outSize , sqlite3_free ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) ;
goto done ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
2024-06-28 19:21:50 -07:00
int outSize = n * sizeof ( i8 ) ;
i8 * out = sqlite3_malloc ( outSize ) ;
2024-04-20 13:38:58 -07:00
if ( ! out ) {
sqlite3_result_error_nomem ( context ) ;
return ;
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , outSize ) ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < n ; i + + ) {
2024-04-20 17:02:19 -07:00
out [ i ] = ( ( i8 * ) vector ) [ start + i ] ;
2024-04-20 13:38:58 -07:00
}
2024-07-05 12:07:45 -07:00
sqlite3_result_blob ( context , out , outSize , sqlite3_free ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_INT8 ) ;
goto done ;
}
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
if ( ( start % CHAR_BIT ) ! = 0 ) {
sqlite3_result_error ( context , " start index must be divisible by 8. " , - 1 ) ;
goto done ;
}
if ( ( end % CHAR_BIT ) ! = 0 ) {
sqlite3_result_error ( context , " end index must be divisible by 8. " , - 1 ) ;
goto done ;
}
2024-06-28 19:21:50 -07:00
int outSize = n / CHAR_BIT ;
u8 * out = sqlite3_malloc ( outSize ) ;
2024-04-20 13:38:58 -07:00
if ( ! out ) {
sqlite3_result_error_nomem ( context ) ;
return ;
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , outSize ) ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < n / CHAR_BIT ; i + + ) {
2024-04-20 17:02:19 -07:00
out [ i ] = ( ( u8 * ) vector ) [ ( start / CHAR_BIT ) + i ] ;
2024-04-20 13:38:58 -07:00
}
2024-07-05 12:07:45 -07:00
sqlite3_result_blob ( context , out , outSize , sqlite3_free ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_BIT ) ;
goto done ;
}
}
done :
cleanup ( vector ) ;
}
static void vec_to_json ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 1 ) ;
2024-04-20 13:38:58 -07:00
void * vector ;
size_t dimensions ;
vector_cleanup cleanup ;
char * err ;
enum VectorElementType elementType ;
int rc = vector_from_value ( argv [ 0 ] , & vector , & dimensions , & elementType ,
& cleanup , & err ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , err , - 1 ) ;
sqlite3_free ( err ) ;
return ;
}
sqlite3_str * str = sqlite3_str_new ( sqlite3_context_db_handle ( context ) ) ;
sqlite3_str_appendall ( str , " [ " ) ;
for ( size_t i = 0 ; i < dimensions ; i + + ) {
if ( i ! = 0 ) {
sqlite3_str_appendall ( str , " , " ) ;
}
if ( elementType = = SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) {
2024-04-27 12:05:35 -07:00
f32 value = ( ( f32 * ) vector ) [ i ] ;
2024-06-13 16:32:57 -07:00
if ( isnan ( value ) ) {
2024-04-27 12:05:35 -07:00
sqlite3_str_appendall ( str , " null " ) ;
2024-06-13 16:32:57 -07:00
} else {
2024-04-27 12:05:35 -07:00
sqlite3_str_appendf ( str , " %f " , value ) ;
}
2024-04-20 13:38:58 -07:00
} else if ( elementType = = SQLITE_VEC_ELEMENT_TYPE_INT8 ) {
2024-04-20 17:02:19 -07:00
sqlite3_str_appendf ( str , " %d " , ( ( i8 * ) vector ) [ i ] ) ;
2024-04-20 13:38:58 -07:00
} else if ( elementType = = SQLITE_VEC_ELEMENT_TYPE_BIT ) {
2024-04-20 17:02:19 -07:00
u8 b = ( ( ( u8 * ) vector ) [ i / 8 ] > > ( i % CHAR_BIT ) ) & 1 ;
2024-04-20 13:38:58 -07:00
sqlite3_str_appendf ( str , " %d " , b ) ;
}
}
sqlite3_str_appendall ( str , " ] " ) ;
int len = sqlite3_str_length ( str ) ;
char * s = sqlite3_str_finish ( str ) ;
if ( s ) {
sqlite3_result_text ( context , s , len , sqlite3_free ) ;
2024-04-27 12:05:35 -07:00
sqlite3_result_subtype ( context , JSON_SUBTYPE ) ;
2024-04-20 13:38:58 -07:00
} else {
sqlite3_result_error_nomem ( context ) ;
}
cleanup ( vector ) ;
}
static void vec_normalize ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
2024-06-24 23:26:11 -07:00
assert ( argc = = 1 ) ;
2024-04-20 13:38:58 -07:00
void * vector ;
size_t dimensions ;
vector_cleanup cleanup ;
char * err ;
enum VectorElementType elementType ;
int rc = vector_from_value ( argv [ 0 ] , & vector , & dimensions , & elementType ,
& cleanup , & err ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_result_error ( context , err , - 1 ) ;
sqlite3_free ( err ) ;
return ;
}
if ( elementType ! = SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) {
sqlite3_result_error (
context , " only float32 vectors are supported when normalizing " , - 1 ) ;
cleanup ( vector ) ;
return ;
}
2024-06-28 19:21:50 -07:00
int outSize = dimensions * sizeof ( f32 ) ;
f32 * out = sqlite3_malloc ( outSize ) ;
2024-06-13 16:32:57 -07:00
if ( ! out ) {
2024-05-12 00:16:10 -07:00
cleanup ( vector ) ;
sqlite3_result_error_code ( context , SQLITE_NOMEM ) ;
return ;
}
2024-06-28 19:21:50 -07:00
memset ( out , 0 , outSize ) ;
2024-05-12 00:16:10 -07:00
2024-04-20 17:05:37 -07:00
f32 * v = ( f32 * ) vector ;
2024-04-20 13:38:58 -07:00
2024-04-20 17:05:37 -07:00
f32 norm = 0 ;
2024-04-20 13:38:58 -07:00
for ( size_t i = 0 ; i < dimensions ; i + + ) {
norm + = v [ i ] * v [ i ] ;
}
norm = sqrt ( norm ) ;
for ( size_t i = 0 ; i < dimensions ; i + + ) {
out [ i ] = v [ i ] / norm ;
}
2024-04-20 17:05:37 -07:00
sqlite3_result_blob ( context , out , dimensions * sizeof ( f32 ) , sqlite3_free ) ;
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype ( context , SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) ;
2024-05-12 00:16:10 -07:00
cleanup ( vector ) ;
2024-04-20 13:38:58 -07:00
}
static void _static_text_func ( sqlite3_context * context , int argc ,
sqlite3_value * * argv ) {
UNUSED_PARAMETER ( argc ) ;
UNUSED_PARAMETER ( argv ) ;
sqlite3_result_text ( context , sqlite3_user_data ( context ) , - 1 , SQLITE_STATIC ) ;
}
2024-06-24 23:26:11 -07:00
# pragma endregion
2024-04-20 13:38:58 -07:00
enum Vec0TokenType {
TOKEN_TYPE_IDENTIFIER ,
TOKEN_TYPE_DIGIT ,
TOKEN_TYPE_LBRACKET ,
TOKEN_TYPE_RBRACKET ,
2024-11-20 00:30:23 -08:00
TOKEN_TYPE_PLUS ,
2024-04-20 13:38:58 -07:00
TOKEN_TYPE_EQ ,
2026-03-02 18:07:57 -08:00
TOKEN_TYPE_LPAREN ,
TOKEN_TYPE_RPAREN ,
TOKEN_TYPE_COMMA ,
2024-04-20 13:38:58 -07:00
} ;
struct Vec0Token {
enum Vec0TokenType token_type ;
char * start ;
char * end ;
} ;
int is_alpha ( char x ) {
return ( x > = ' a ' & & x < = ' z ' ) | | ( x > = ' A ' & & x < = ' Z ' ) ;
}
int is_digit ( char x ) { return ( x > = ' 0 ' & & x < = ' 9 ' ) ; }
int is_whitespace ( char x ) {
return x = = ' ' | | x = = ' \t ' | | x = = ' \n ' | | x = = ' \r ' ;
}
# define VEC0_TOKEN_RESULT_EOF 1
# define VEC0_TOKEN_RESULT_SOME 2
# define VEC0_TOKEN_RESULT_ERROR 3
int vec0_token_next ( char * start , char * end , struct Vec0Token * out ) {
char * ptr = start ;
while ( ptr < end ) {
char curr = * ptr ;
if ( is_whitespace ( curr ) ) {
ptr + + ;
continue ;
2024-11-20 00:30:23 -08:00
} else if ( curr = = ' + ' ) {
ptr + + ;
out - > start = ptr ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_PLUS ;
return VEC0_TOKEN_RESULT_SOME ;
2024-04-20 13:38:58 -07:00
} else if ( curr = = ' [ ' ) {
ptr + + ;
out - > start = ptr ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_LBRACKET ;
return VEC0_TOKEN_RESULT_SOME ;
} else if ( curr = = ' ] ' ) {
ptr + + ;
out - > start = ptr ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_RBRACKET ;
return VEC0_TOKEN_RESULT_SOME ;
} else if ( curr = = ' = ' ) {
ptr + + ;
out - > start = ptr ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_EQ ;
return VEC0_TOKEN_RESULT_SOME ;
2026-03-02 18:07:57 -08:00
} else if ( curr = = ' ( ' ) {
ptr + + ;
out - > start = ptr ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_LPAREN ;
return VEC0_TOKEN_RESULT_SOME ;
} else if ( curr = = ' ) ' ) {
ptr + + ;
out - > start = ptr ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_RPAREN ;
return VEC0_TOKEN_RESULT_SOME ;
} else if ( curr = = ' , ' ) {
ptr + + ;
out - > start = ptr ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_COMMA ;
return VEC0_TOKEN_RESULT_SOME ;
2024-04-20 13:38:58 -07:00
} else if ( is_alpha ( curr ) ) {
char * start = ptr ;
while ( ptr < end & & ( is_alpha ( * ptr ) | | is_digit ( * ptr ) | | * ptr = = ' _ ' ) ) {
ptr + + ;
}
out - > start = start ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_IDENTIFIER ;
return VEC0_TOKEN_RESULT_SOME ;
} else if ( is_digit ( curr ) ) {
char * start = ptr ;
while ( ptr < end & & ( is_digit ( * ptr ) ) ) {
ptr + + ;
}
out - > start = start ;
out - > end = ptr ;
out - > token_type = TOKEN_TYPE_DIGIT ;
return VEC0_TOKEN_RESULT_SOME ;
} else {
return VEC0_TOKEN_RESULT_ERROR ;
}
}
return VEC0_TOKEN_RESULT_EOF ;
}
struct Vec0Scanner {
char * start ;
char * end ;
char * ptr ;
} ;
void vec0_scanner_init ( struct Vec0Scanner * scanner , const char * source ,
int source_length ) {
scanner - > start = ( char * ) source ;
scanner - > end = ( char * ) source + source_length ;
scanner - > ptr = ( char * ) source ;
}
int vec0_scanner_next ( struct Vec0Scanner * scanner , struct Vec0Token * out ) {
int rc = vec0_token_next ( scanner - > start , scanner - > end , out ) ;
if ( rc = = VEC0_TOKEN_RESULT_SOME ) {
scanner - > start = out - > end ;
}
return rc ;
}
int vec0_parse_table_option ( const char * source , int source_length ,
char * * out_key , int * out_key_length ,
char * * out_value , int * out_value_length ) {
int rc ;
struct Vec0Scanner scanner ;
struct Vec0Token token ;
char * key ;
char * value ;
int keyLength , valueLength ;
vec0_scanner_init ( & scanner , source , source_length ) ;
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
key = token . start ;
keyLength = token . end - token . start ;
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & & token . token_type ! = TOKEN_TYPE_EQ ) {
return SQLITE_EMPTY ;
}
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
! ( ( token . token_type = = TOKEN_TYPE_IDENTIFIER ) | |
( token . token_type = = TOKEN_TYPE_DIGIT ) ) ) {
2024-06-24 23:26:11 -07:00
return SQLITE_ERROR ;
2024-04-20 13:38:58 -07:00
}
value = token . start ;
valueLength = token . end - token . start ;
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc = = VEC0_TOKEN_RESULT_EOF ) {
* out_key = key ;
* out_key_length = keyLength ;
* out_value = value ;
* out_value_length = valueLength ;
return SQLITE_OK ;
}
return SQLITE_ERROR ;
}
2024-11-20 00:02:04 -08:00
/**
* @ brief Parse an argv [ i ] entry of a vec0 virtual table definition , and see if
* it ' s a PARTITION KEY definition .
*
* @ param source : argv [ i ] source string
* @ param source_length : length of the source string
* @ param out_column_name : If it is a partition key , the output column name . Same lifetime
* as source , points to specific char *
* @ param out_column_name_length : Length of out_column_name in bytes
* @ param out_column_type : SQLITE_TEXT or SQLITE_INTEGER .
* @ return int : SQLITE_EMPTY if not a PK , SQLITE_OK if it is .
*/
int vec0_parse_partition_key_definition ( const char * source , int source_length ,
char * * out_column_name ,
int * out_column_name_length ,
int * out_column_type ) {
struct Vec0Scanner scanner ;
struct Vec0Token token ;
char * column_name ;
int column_name_length ;
int column_type ;
vec0_scanner_init ( & scanner , source , source_length ) ;
// Check first token is identifier, will be the column name
int rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
column_name = token . start ;
column_name_length = token . end - token . start ;
// Check the next token matches "text" or "integer", as column type
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
if ( sqlite3_strnicmp ( token . start , " text " , token . end - token . start ) = = 0 ) {
column_type = SQLITE_TEXT ;
} else if ( sqlite3_strnicmp ( token . start , " int " , token . end - token . start ) = =
0 | |
sqlite3_strnicmp ( token . start , " integer " ,
token . end - token . start ) = = 0 ) {
column_type = SQLITE_INTEGER ;
} else {
return SQLITE_EMPTY ;
}
// Check the next token is identifier and matches "partition"
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
if ( sqlite3_strnicmp ( token . start , " partition " , token . end - token . start ) ! = 0 ) {
return SQLITE_EMPTY ;
}
// Check the next token is identifier and matches "key"
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
if ( sqlite3_strnicmp ( token . start , " key " , token . end - token . start ) ! = 0 ) {
return SQLITE_EMPTY ;
}
* out_column_name = column_name ;
* out_column_name_length = column_name_length ;
* out_column_type = column_type ;
return SQLITE_OK ;
}
2024-11-20 00:30:23 -08:00
/**
* @ brief Parse an argv [ i ] entry of a vec0 virtual table definition , and see if
* it ' s an auxiliar column definition , ie ` + [ name ] [ type ] ` like ` + contents text `
*
* @ param source : argv [ i ] source string
* @ param source_length : length of the source string
* @ param out_column_name : If it is a partition key , the output column name . Same lifetime
* as source , points to specific char *
* @ param out_column_name_length : Length of out_column_name in bytes
* @ param out_column_type : SQLITE_TEXT , SQLITE_INTEGER , SQLITE_FLOAT , or SQLITE_BLOB .
* @ return int : SQLITE_EMPTY if not an aux column , SQLITE_OK if it is .
*/
int vec0_parse_auxiliary_column_definition ( const char * source , int source_length ,
char * * out_column_name ,
int * out_column_name_length ,
int * out_column_type ) {
struct Vec0Scanner scanner ;
struct Vec0Token token ;
char * column_name ;
int column_name_length ;
int column_type ;
vec0_scanner_init ( & scanner , source , source_length ) ;
// Check first token is '+', which denotes aux columns
int rc = vec0_scanner_next ( & scanner , & token ) ;
2024-11-20 00:59:34 -08:00
if ( rc ! = VEC0_TOKEN_RESULT_SOME | |
2024-11-20 00:30:23 -08:00
token . token_type ! = TOKEN_TYPE_PLUS ) {
return SQLITE_EMPTY ;
}
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
column_name = token . start ;
column_name_length = token . end - token . start ;
// Check the next token matches "text" or "integer", as column type
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
if ( sqlite3_strnicmp ( token . start , " text " , token . end - token . start ) = = 0 ) {
column_type = SQLITE_TEXT ;
} else if ( sqlite3_strnicmp ( token . start , " int " , token . end - token . start ) = =
0 | |
sqlite3_strnicmp ( token . start , " integer " ,
token . end - token . start ) = = 0 ) {
column_type = SQLITE_INTEGER ;
} else if ( sqlite3_strnicmp ( token . start , " float " , token . end - token . start ) = =
0 | |
sqlite3_strnicmp ( token . start , " double " ,
token . end - token . start ) = = 0 ) {
column_type = SQLITE_FLOAT ;
} else if ( sqlite3_strnicmp ( token . start , " blob " , token . end - token . start ) = = 0 ) {
column_type = SQLITE_BLOB ;
} else {
return SQLITE_EMPTY ;
}
* out_column_name = column_name ;
* out_column_name_length = column_name_length ;
* out_column_type = column_type ;
return SQLITE_OK ;
}
2024-11-20 00:59:34 -08:00
typedef enum {
VEC0_METADATA_COLUMN_KIND_BOOLEAN ,
VEC0_METADATA_COLUMN_KIND_INTEGER ,
VEC0_METADATA_COLUMN_KIND_FLOAT ,
VEC0_METADATA_COLUMN_KIND_TEXT ,
// future: blob, date, datetime
} vec0_metadata_column_kind ;
/**
* @ brief Parse an argv [ i ] entry of a vec0 virtual table definition , and see if
* it ' s an metadata column definition , ie ` [ name ] [ type ] ` like ` is_released boolean `
*
* @ param source : argv [ i ] source string
* @ param source_length : length of the source string
* @ param out_column_name : If it is a metadata column , the output column name . Same lifetime
* as source , points to specific char *
* @ param out_column_name_length : Length of out_column_name in bytes
* @ param out_column_type : one of vec0_metadata_column_kind
* @ return int : SQLITE_EMPTY if not an metadata column , SQLITE_OK if it is .
*/
int vec0_parse_metadata_column_definition ( const char * source , int source_length ,
char * * out_column_name ,
int * out_column_name_length ,
vec0_metadata_column_kind * out_column_type ) {
struct Vec0Scanner scanner ;
struct Vec0Token token ;
char * column_name ;
int column_name_length ;
vec0_metadata_column_kind column_type ;
int rc ;
vec0_scanner_init ( & scanner , source , source_length ) ;
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | |
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
column_name = token . start ;
column_name_length = token . end - token . start ;
// Check the next token matches a valid metadata type
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | |
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
char * t = token . start ;
int n = token . end - token . start ;
if ( sqlite3_strnicmp ( t , " boolean " , n ) = = 0 | | sqlite3_strnicmp ( t , " bool " , n ) = = 0 ) {
column_type = VEC0_METADATA_COLUMN_KIND_BOOLEAN ;
} else if ( sqlite3_strnicmp ( t , " int64 " , n ) = = 0 | | sqlite3_strnicmp ( t , " integer64 " , n ) = = 0 | | sqlite3_strnicmp ( t , " integer " , n ) = = 0 | | sqlite3_strnicmp ( t , " int " , n ) = = 0 ) {
column_type = VEC0_METADATA_COLUMN_KIND_INTEGER ;
} else if ( sqlite3_strnicmp ( t , " float " , n ) = = 0 | | sqlite3_strnicmp ( t , " double " , n ) = = 0 | | sqlite3_strnicmp ( t , " float64 " , n ) = = 0 | | sqlite3_strnicmp ( t , " f64 " , n ) = = 0 ) {
column_type = VEC0_METADATA_COLUMN_KIND_FLOAT ;
} else if ( sqlite3_strnicmp ( t , " text " , n ) = = 0 ) {
column_type = VEC0_METADATA_COLUMN_KIND_TEXT ;
} else {
return SQLITE_EMPTY ;
}
* out_column_name = column_name ;
* out_column_name_length = column_name_length ;
* out_column_type = column_type ;
return SQLITE_OK ;
}
2024-04-20 13:38:58 -07:00
/**
* @ brief Parse an argv [ i ] entry of a vec0 virtual table definition , and see if
* it ' s a PRIMARY KEY definition .
*
* @ param source : argv [ i ] source string
* @ param source_length : length of the source string
* @ param out_column_name : If it is a PK , the output column name . Same lifetime
* as source , points to specific char *
* @ param out_column_name_length : Length of out_column_name in bytes
* @ param out_column_type : SQLITE_TEXT or SQLITE_INTEGER .
* @ return int : SQLITE_EMPTY if not a PK , SQLITE_OK if it is .
*/
2024-11-20 00:02:04 -08:00
int vec0_parse_primary_key_definition ( const char * source , int source_length ,
2024-04-20 13:38:58 -07:00
char * * out_column_name ,
int * out_column_name_length ,
int * out_column_type ) {
struct Vec0Scanner scanner ;
struct Vec0Token token ;
char * column_name ;
int column_name_length ;
int column_type ;
vec0_scanner_init ( & scanner , source , source_length ) ;
// Check first token is identifier, will be the column name
int rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
column_name = token . start ;
column_name_length = token . end - token . start ;
// Check the next token matches "text" or "integer", as column type
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
if ( sqlite3_strnicmp ( token . start , " text " , token . end - token . start ) = = 0 ) {
column_type = SQLITE_TEXT ;
} else if ( sqlite3_strnicmp ( token . start , " int " , token . end - token . start ) = =
0 | |
sqlite3_strnicmp ( token . start , " integer " ,
token . end - token . start ) = = 0 ) {
column_type = SQLITE_INTEGER ;
} else {
return SQLITE_EMPTY ;
}
// Check the next token is identifier and matches "primary"
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
if ( sqlite3_strnicmp ( token . start , " primary " , token . end - token . start ) ! = 0 ) {
return SQLITE_EMPTY ;
}
// Check the next token is identifier and matches "key"
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_EMPTY ;
}
if ( sqlite3_strnicmp ( token . start , " key " , token . end - token . start ) ! = 0 ) {
return SQLITE_EMPTY ;
}
* out_column_name = column_name ;
* out_column_name_length = column_name_length ;
* out_column_type = column_type ;
return SQLITE_OK ;
}
enum Vec0DistanceMetrics {
VEC0_DISTANCE_METRIC_L2 = 1 ,
VEC0_DISTANCE_METRIC_COSINE = 2 ,
2024-07-23 14:04:17 -07:00
VEC0_DISTANCE_METRIC_L1 = 3 ,
2024-04-20 13:38:58 -07:00
} ;
2026-03-29 19:44:44 -07:00
/**
* Compute distance between two full - precision vectors using the appropriate
* distance function for the given element type and metric .
* Shared utility used by ANN index implementations .
*/
static f32 vec0_distance_full (
const void * a , const void * b , size_t dimensions ,
enum VectorElementType elementType ,
enum Vec0DistanceMetrics metric ) {
switch ( elementType ) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 :
switch ( metric ) {
case VEC0_DISTANCE_METRIC_L2 :
return distance_l2_sqr_float ( a , b , & dimensions ) ;
case VEC0_DISTANCE_METRIC_COSINE :
return distance_cosine_float ( a , b , & dimensions ) ;
case VEC0_DISTANCE_METRIC_L1 :
return ( f32 ) distance_l1_f32 ( a , b , & dimensions ) ;
}
break ;
case SQLITE_VEC_ELEMENT_TYPE_INT8 :
switch ( metric ) {
case VEC0_DISTANCE_METRIC_L2 :
return distance_l2_sqr_int8 ( a , b , & dimensions ) ;
case VEC0_DISTANCE_METRIC_COSINE :
return distance_cosine_int8 ( a , b , & dimensions ) ;
case VEC0_DISTANCE_METRIC_L1 :
return ( f32 ) distance_l1_int8 ( a , b , & dimensions ) ;
}
break ;
case SQLITE_VEC_ELEMENT_TYPE_BIT :
return distance_hamming ( a , b , & dimensions ) ;
}
return 0.0f ;
}
enum Vec0IndexType {
VEC0_INDEX_TYPE_FLAT = 1 ,
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
VEC0_INDEX_TYPE_RESCORE = 2 ,
# endif
2026-03-29 19:46:23 -07:00
VEC0_INDEX_TYPE_IVF = 3 ,
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
VEC0_INDEX_TYPE_DISKANN = 4 ,
2026-03-29 19:44:44 -07:00
} ;
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
enum Vec0RescoreQuantizerType {
VEC0_RESCORE_QUANTIZER_BIT = 1 ,
VEC0_RESCORE_QUANTIZER_INT8 = 2 ,
} ;
struct Vec0RescoreConfig {
enum Vec0RescoreQuantizerType quantizer_type ;
int oversample ;
} ;
# endif
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
enum Vec0IvfQuantizer {
VEC0_IVF_QUANTIZER_NONE = 0 ,
VEC0_IVF_QUANTIZER_INT8 = 1 ,
VEC0_IVF_QUANTIZER_BINARY = 2 ,
} ;
struct Vec0IvfConfig {
int nlist ; // number of centroids (0 = deferred)
int nprobe ; // cells to probe at query time
int quantizer ; // VEC0_IVF_QUANTIZER_NONE / INT8 / BINARY
int oversample ; // >= 1 (1 = no oversampling)
} ;
# else
struct Vec0IvfConfig { char _unused ; } ;
# endif
2026-03-31 01:12:50 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// ============================================================
// DiskANN types and constants
// ============================================================
# define VEC0_DISKANN_DEFAULT_N_NEIGHBORS 72
# define VEC0_DISKANN_MAX_N_NEIGHBORS 256
# define VEC0_DISKANN_DEFAULT_SEARCH_LIST_SIZE 128
# define VEC0_DISKANN_DEFAULT_ALPHA 1.2f
/**
* Quantizer type used for compressing neighbor vectors in the DiskANN graph .
*/
enum Vec0DiskannQuantizerType {
VEC0_DISKANN_QUANTIZER_BINARY = 1 , // 1 bit per dimension (1/32 compression)
VEC0_DISKANN_QUANTIZER_INT8 = 2 , // 1 byte per dimension (1/4 compression)
} ;
/**
* Configuration for a DiskANN index on a single vector column .
* Parsed from ` INDEXED BY diskann ( neighbor_quantizer = binary , n_neighbors = 72 ) ` .
*/
struct Vec0DiskannConfig {
// Quantizer type for neighbor vectors
enum Vec0DiskannQuantizerType quantizer_type ;
// Maximum number of neighbors per node (R in the paper). Must be divisible by 8.
int n_neighbors ;
// Search list size (L in the paper) — unified default for both insert and query.
int search_list_size ;
// Per-path overrides (0 = fall back to search_list_size).
int search_list_size_search ;
int search_list_size_insert ;
// Alpha parameter for RobustPrune (distance scaling factor, typically 1.0-1.5)
f32 alpha ;
// Buffer threshold for batched inserts. When > 0, inserts go into a flat
// buffer table and are flushed into the graph when the buffer reaches this
// size. 0 = disabled (legacy per-row insert behavior).
int buffer_threshold ;
} ;
/**
* Represents a single candidate during greedy beam search .
* Used in priority queues / sorted arrays during LM - Search .
*/
struct Vec0DiskannCandidate {
i64 rowid ;
f32 distance ;
2026-03-31 17:13:29 -07:00
int visited ; // 1 if this candidate's neighbors have been explored
int confirmed ; // 1 if full-precision vector was successfully read (node exists)
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
} ;
/**
* Returns the byte size of a quantized vector for the given quantizer type
* and number of dimensions .
*/
size_t diskann_quantized_vector_byte_size (
enum Vec0DiskannQuantizerType quantizer_type , size_t dimensions ) {
switch ( quantizer_type ) {
case VEC0_DISKANN_QUANTIZER_BINARY :
return dimensions / CHAR_BIT ; // 1 bit per dimension
case VEC0_DISKANN_QUANTIZER_INT8 :
return dimensions * sizeof ( i8 ) ; // 1 byte per dimension
}
return 0 ;
}
2024-04-20 13:38:58 -07:00
struct VectorColumnDefinition {
char * name ;
int name_length ;
size_t dimensions ;
enum VectorElementType element_type ;
enum Vec0DistanceMetrics distance_metric ;
2026-03-29 19:44:44 -07:00
enum Vec0IndexType index_type ;
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
struct Vec0RescoreConfig rescore ;
# endif
2026-03-29 19:46:23 -07:00
struct Vec0IvfConfig ivf ;
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
struct Vec0DiskannConfig diskann ;
2024-04-20 13:38:58 -07:00
} ;
2024-11-20 00:02:04 -08:00
struct Vec0PartitionColumnDefinition {
int type ;
char * name ;
int name_length ;
} ;
2024-11-20 00:30:23 -08:00
struct Vec0AuxiliaryColumnDefinition {
int type ;
char * name ;
int name_length ;
} ;
2024-11-20 00:59:34 -08:00
struct Vec0MetadataColumnDefinition {
vec0_metadata_column_kind kind ;
char * name ;
int name_length ;
} ;
2024-11-20 00:30:23 -08:00
2024-06-28 10:51:49 -07:00
size_t vector_byte_size ( enum VectorElementType element_type ,
size_t dimensions ) {
switch ( element_type ) {
2024-04-20 13:38:58 -07:00
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 :
2024-06-28 10:51:49 -07:00
return dimensions * sizeof ( f32 ) ;
2024-04-20 13:38:58 -07:00
case SQLITE_VEC_ELEMENT_TYPE_INT8 :
2024-06-28 10:51:49 -07:00
return dimensions * sizeof ( i8 ) ;
2024-04-20 13:38:58 -07:00
case SQLITE_VEC_ELEMENT_TYPE_BIT :
2024-06-28 10:51:49 -07:00
return dimensions / CHAR_BIT ;
2024-04-20 13:38:58 -07:00
}
2024-08-09 10:25:31 -07:00
return 0 ;
2024-04-20 13:38:58 -07:00
}
2024-06-28 16:00:58 -07:00
size_t vector_column_byte_size ( struct VectorColumnDefinition column ) {
2024-06-28 10:51:49 -07:00
return vector_byte_size ( column . element_type , column . dimensions ) ;
}
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
/**
* @ brief Parse rescore options from an " INDEXED BY rescore(...) " clause .
*
* @ param scanner Scanner positioned right after the opening ' ( ' of rescore ( . . . )
* @ param outConfig Output rescore config
* @ param pzErr Error message output
* @ return int SQLITE_OK on success , SQLITE_ERROR on error .
*/
static int vec0_parse_rescore_options ( struct Vec0Scanner * scanner ,
struct Vec0RescoreConfig * outConfig ,
char * * pzErr ) {
struct Vec0Token token ;
int rc ;
int hasQuantizer = 0 ;
outConfig - > oversample = 8 ;
outConfig - > quantizer_type = 0 ;
while ( 1 ) {
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc = = VEC0_TOKEN_RESULT_EOF ) {
break ;
}
// ')' closes rescore options
if ( rc = = VEC0_TOKEN_RESULT_SOME & & token . token_type = = TOKEN_TYPE_RPAREN ) {
break ;
}
if ( rc ! = VEC0_TOKEN_RESULT_SOME | | token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
* pzErr = sqlite3_mprintf ( " Expected option name in rescore(...) " ) ;
return SQLITE_ERROR ;
}
char * key = token . start ;
int keyLength = token . end - token . start ;
// expect '='
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | | token . token_type ! = TOKEN_TYPE_EQ ) {
* pzErr = sqlite3_mprintf ( " Expected '=' after option name in rescore(...) " ) ;
return SQLITE_ERROR ;
}
// value
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME ) {
* pzErr = sqlite3_mprintf ( " Expected value after '=' in rescore(...) " ) ;
return SQLITE_ERROR ;
}
if ( sqlite3_strnicmp ( key , " quantizer " , keyLength ) = = 0 ) {
if ( token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
* pzErr = sqlite3_mprintf ( " Expected identifier for quantizer value in rescore(...) " ) ;
return SQLITE_ERROR ;
}
int valLen = token . end - token . start ;
if ( sqlite3_strnicmp ( token . start , " bit " , valLen ) = = 0 ) {
outConfig - > quantizer_type = VEC0_RESCORE_QUANTIZER_BIT ;
} else if ( sqlite3_strnicmp ( token . start , " int8 " , valLen ) = = 0 ) {
outConfig - > quantizer_type = VEC0_RESCORE_QUANTIZER_INT8 ;
} else {
* pzErr = sqlite3_mprintf ( " Unknown quantizer type '%.*s' in rescore(...). Expected 'bit' or 'int8'. " , valLen , token . start ) ;
return SQLITE_ERROR ;
}
hasQuantizer = 1 ;
} else if ( sqlite3_strnicmp ( key , " oversample " , keyLength ) = = 0 ) {
if ( token . token_type ! = TOKEN_TYPE_DIGIT ) {
* pzErr = sqlite3_mprintf ( " Expected integer for oversample value in rescore(...) " ) ;
return SQLITE_ERROR ;
}
outConfig - > oversample = atoi ( token . start ) ;
if ( outConfig - > oversample < = 0 | | outConfig - > oversample > 128 ) {
* pzErr = sqlite3_mprintf ( " oversample in rescore(...) must be between 1 and 128, got %d " , outConfig - > oversample ) ;
return SQLITE_ERROR ;
}
} else {
* pzErr = sqlite3_mprintf ( " Unknown option '%.*s' in rescore(...) " , keyLength , key ) ;
return SQLITE_ERROR ;
}
// optional comma between options
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc = = VEC0_TOKEN_RESULT_EOF ) {
break ;
}
if ( rc = = VEC0_TOKEN_RESULT_SOME & & token . token_type = = TOKEN_TYPE_RPAREN ) {
break ;
}
if ( rc = = VEC0_TOKEN_RESULT_SOME & & token . token_type = = TOKEN_TYPE_COMMA ) {
continue ;
}
// If it's not a comma or rparen, it might be the next key — push back isn't
// possible with this scanner, so we'll treat unexpected tokens as errors
* pzErr = sqlite3_mprintf ( " Unexpected token in rescore(...) options " ) ;
return SQLITE_ERROR ;
}
if ( ! hasQuantizer ) {
* pzErr = sqlite3_mprintf ( " rescore(...) requires a 'quantizer' option (quantizer=bit or quantizer=int8) " ) ;
return SQLITE_ERROR ;
}
return SQLITE_OK ;
}
# endif /* SQLITE_VEC_ENABLE_RESCORE */
2024-06-24 23:26:11 -07:00
/**
* @ brief Parse an vec0 vtab argv [ i ] column definition and see if
* it ' s a vector column defintion , ex ` contents_embedding float [ 768 ] ` .
*
* @ param source vec0 argv [ i ] item
* @ param source_length length of source in bytes
* @ param outColumn Output the parse vector column to this struct , if success
* @ return int SQLITE_OK on success , SQLITE_EMPTY is it ' s not a vector column
* definition , SQLITE_ERROR on error .
*/
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// Forward declaration — defined in sqlite-vec-ivf.c
static int vec0_parse_ivf_options ( struct Vec0Scanner * scanner ,
struct Vec0IvfConfig * config ) ;
# endif
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
/**
* Parse the options inside diskann ( . . . ) parentheses .
* Scanner should be positioned right before the ' ( ' token .
*
* Recognized options :
* neighbor_quantizer = binary | int8 ( required )
* n_neighbors = < integer > ( optional , default 72 )
* search_list_size = < integer > ( optional , default 128 )
*/
static int vec0_parse_diskann_options ( struct Vec0Scanner * scanner ,
struct Vec0DiskannConfig * config ) {
int rc ;
struct Vec0Token token ;
int hasQuantizer = 0 ;
// Set defaults
config - > n_neighbors = VEC0_DISKANN_DEFAULT_N_NEIGHBORS ;
config - > search_list_size = VEC0_DISKANN_DEFAULT_SEARCH_LIST_SIZE ;
config - > search_list_size_search = 0 ;
config - > search_list_size_insert = 0 ;
config - > alpha = VEC0_DISKANN_DEFAULT_ALPHA ;
config - > buffer_threshold = 0 ;
int hasSearchListSize = 0 ;
int hasSearchListSizeSplit = 0 ;
// Expect '('
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | | token . token_type ! = TOKEN_TYPE_LPAREN ) {
return SQLITE_ERROR ;
}
while ( 1 ) {
// key
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc = = VEC0_TOKEN_RESULT_SOME & & token . token_type = = TOKEN_TYPE_RPAREN ) {
break ; // empty parens or trailing comma
}
if ( rc ! = VEC0_TOKEN_RESULT_SOME | | token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_ERROR ;
}
char * optKey = token . start ;
int optKeyLen = token . end - token . start ;
// '='
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | | token . token_type ! = TOKEN_TYPE_EQ ) {
return SQLITE_ERROR ;
}
// value (identifier or digit)
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME ) {
return SQLITE_ERROR ;
}
char * optVal = token . start ;
int optValLen = token . end - token . start ;
if ( sqlite3_strnicmp ( optKey , " neighbor_quantizer " , optKeyLen ) = = 0 ) {
if ( sqlite3_strnicmp ( optVal , " binary " , optValLen ) = = 0 ) {
config - > quantizer_type = VEC0_DISKANN_QUANTIZER_BINARY ;
} else if ( sqlite3_strnicmp ( optVal , " int8 " , optValLen ) = = 0 ) {
config - > quantizer_type = VEC0_DISKANN_QUANTIZER_INT8 ;
} else {
return SQLITE_ERROR ; // unknown quantizer
}
hasQuantizer = 1 ;
} else if ( sqlite3_strnicmp ( optKey , " n_neighbors " , optKeyLen ) = = 0 ) {
config - > n_neighbors = atoi ( optVal ) ;
if ( config - > n_neighbors < = 0 | | ( config - > n_neighbors % 8 ) ! = 0 | |
config - > n_neighbors > VEC0_DISKANN_MAX_N_NEIGHBORS ) {
return SQLITE_ERROR ;
}
} else if ( sqlite3_strnicmp ( optKey , " search_list_size_search " , optKeyLen ) = = 0 & & optKeyLen = = 23 ) {
config - > search_list_size_search = atoi ( optVal ) ;
if ( config - > search_list_size_search < = 0 ) {
return SQLITE_ERROR ;
}
hasSearchListSizeSplit = 1 ;
} else if ( sqlite3_strnicmp ( optKey , " search_list_size_insert " , optKeyLen ) = = 0 & & optKeyLen = = 23 ) {
config - > search_list_size_insert = atoi ( optVal ) ;
if ( config - > search_list_size_insert < = 0 ) {
return SQLITE_ERROR ;
}
hasSearchListSizeSplit = 1 ;
} else if ( sqlite3_strnicmp ( optKey , " search_list_size " , optKeyLen ) = = 0 ) {
config - > search_list_size = atoi ( optVal ) ;
if ( config - > search_list_size < = 0 ) {
return SQLITE_ERROR ;
}
hasSearchListSize = 1 ;
} else if ( sqlite3_strnicmp ( optKey , " buffer_threshold " , optKeyLen ) = = 0 ) {
config - > buffer_threshold = atoi ( optVal ) ;
if ( config - > buffer_threshold < 0 ) {
return SQLITE_ERROR ;
}
} else {
return SQLITE_ERROR ; // unknown option
}
// Expect ',' or ')'
rc = vec0_scanner_next ( scanner , & token ) ;
if ( rc = = VEC0_TOKEN_RESULT_SOME & & token . token_type = = TOKEN_TYPE_RPAREN ) {
break ;
}
if ( rc ! = VEC0_TOKEN_RESULT_SOME | | token . token_type ! = TOKEN_TYPE_COMMA ) {
return SQLITE_ERROR ;
}
}
if ( ! hasQuantizer ) {
return SQLITE_ERROR ; // neighbor_quantizer is required
}
if ( hasSearchListSize & & hasSearchListSizeSplit ) {
return SQLITE_ERROR ; // cannot mix search_list_size with search_list_size_search/insert
}
return SQLITE_OK ;
}
2024-11-20 00:02:04 -08:00
int vec0_parse_vector_column ( const char * source , int source_length ,
2024-06-24 23:26:11 -07:00
struct VectorColumnDefinition * outColumn ) {
2024-04-20 13:38:58 -07:00
// parses a vector column definition like so:
// "abc float[123]", "abc_123 bit[1234]", eetc.
2024-07-31 12:56:09 -07:00
// https://github.com/asg017/sqlite-vec/issues/46
2024-06-24 23:26:11 -07:00
int rc ;
2024-04-20 13:38:58 -07:00
struct Vec0Scanner scanner ;
struct Vec0Token token ;
2024-06-24 23:26:11 -07:00
char * name ;
int nameLength ;
enum VectorElementType elementType ;
enum Vec0DistanceMetrics distanceMetric = VEC0_DISTANCE_METRIC_L2 ;
2026-03-29 19:44:44 -07:00
enum Vec0IndexType indexType = VEC0_INDEX_TYPE_FLAT ;
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
struct Vec0RescoreConfig rescoreConfig ;
memset ( & rescoreConfig , 0 , sizeof ( rescoreConfig ) ) ;
# endif
2026-03-29 19:46:23 -07:00
struct Vec0IvfConfig ivfConfig ;
memset ( & ivfConfig , 0 , sizeof ( ivfConfig ) ) ;
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
struct Vec0DiskannConfig diskannConfig ;
memset ( & diskannConfig , 0 , sizeof ( diskannConfig ) ) ;
2024-06-24 23:26:11 -07:00
int dimensions ;
2024-04-20 13:38:58 -07:00
vec0_scanner_init ( & scanner , source , source_length ) ;
2024-06-24 23:26:11 -07:00
// starts with an identifier
rc = vec0_scanner_next ( & scanner , & token ) ;
2024-04-20 13:38:58 -07:00
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
2024-06-24 23:26:11 -07:00
return SQLITE_EMPTY ;
2024-04-20 13:38:58 -07:00
}
2024-06-24 23:26:11 -07:00
name = token . start ;
nameLength = token . end - token . start ;
2024-04-20 13:38:58 -07:00
2024-06-24 23:26:11 -07:00
// vector column type comes next: float, int, or bit
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next ( & scanner , & token ) ;
2024-06-24 23:26:11 -07:00
if ( rc ! = VEC0_TOKEN_RESULT_SOME | |
2024-04-20 13:38:58 -07:00
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
2024-06-24 23:26:11 -07:00
return SQLITE_EMPTY ;
2024-04-20 13:38:58 -07:00
}
2024-06-24 23:26:11 -07:00
if ( sqlite3_strnicmp ( token . start , " float " , 5 ) = = 0 | |
sqlite3_strnicmp ( token . start , " f32 " , 3 ) = = 0 ) {
elementType = SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ;
} else if ( sqlite3_strnicmp ( token . start , " int8 " , 4 ) = = 0 | |
sqlite3_strnicmp ( token . start , " i8 " , 2 ) = = 0 ) {
elementType = SQLITE_VEC_ELEMENT_TYPE_INT8 ;
} else if ( sqlite3_strnicmp ( token . start , " bit " , 3 ) = = 0 ) {
elementType = SQLITE_VEC_ELEMENT_TYPE_BIT ;
2024-04-20 13:38:58 -07:00
} else {
2024-06-24 23:26:11 -07:00
return SQLITE_EMPTY ;
2024-04-20 13:38:58 -07:00
}
2024-06-24 23:26:11 -07:00
// left '[' bracket
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & & token . token_type ! = TOKEN_TYPE_LBRACKET ) {
2024-11-20 00:59:34 -08:00
return SQLITE_EMPTY ;
2024-04-20 13:38:58 -07:00
}
2024-06-24 23:26:11 -07:00
// digit, for vector dimension length
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & & token . token_type ! = TOKEN_TYPE_DIGIT ) {
return SQLITE_ERROR ;
}
2024-06-24 23:26:11 -07:00
dimensions = atoi ( token . start ) ;
if ( dimensions < = 0 ) {
return SQLITE_ERROR ;
}
2024-04-20 13:38:58 -07:00
2024-06-24 23:26:11 -07:00
// // right ']' bracket
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & & token . token_type ! = TOKEN_TYPE_RBRACKET ) {
return SQLITE_ERROR ;
}
// any other tokens left should be column-level options , ex `key=value`
2024-06-24 23:26:11 -07:00
// ex `distance_metric=L2 distance_metric=cosine` should error
2024-04-20 13:38:58 -07:00
while ( 1 ) {
2024-08-06 03:15:02 +09:00
// should be EOF or identifier (option key)
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc = = VEC0_TOKEN_RESULT_EOF ) {
2024-06-24 23:26:11 -07:00
break ;
2024-04-20 13:38:58 -07:00
}
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_ERROR ;
}
char * key = token . start ;
int keyLength = token . end - token . start ;
if ( sqlite3_strnicmp ( key , " distance_metric " , keyLength ) = = 0 ) {
2024-06-24 23:26:11 -07:00
if ( elementType = = SQLITE_VEC_ELEMENT_TYPE_BIT ) {
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR ;
}
2024-06-24 23:26:11 -07:00
// ensure equal sign after distance_metric
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & & token . token_type ! = TOKEN_TYPE_EQ ) {
return SQLITE_ERROR ;
}
2024-08-06 03:15:02 +09:00
// distance_metric value, an identifier (L2, cosine, etc)
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME & &
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_ERROR ;
}
char * value = token . start ;
int valueLength = token . end - token . start ;
if ( sqlite3_strnicmp ( value , " l2 " , valueLength ) = = 0 ) {
2024-06-24 23:26:11 -07:00
distanceMetric = VEC0_DISTANCE_METRIC_L2 ;
2024-07-23 23:57:42 -07:00
} else if ( sqlite3_strnicmp ( value , " l1 " , valueLength ) = = 0 ) {
2024-07-23 14:04:17 -07:00
distanceMetric = VEC0_DISTANCE_METRIC_L1 ;
2024-04-20 13:38:58 -07:00
} else if ( sqlite3_strnicmp ( value , " cosine " , valueLength ) = = 0 ) {
2024-06-24 23:26:11 -07:00
distanceMetric = VEC0_DISTANCE_METRIC_COSINE ;
2024-04-20 13:38:58 -07:00
} else {
return SQLITE_ERROR ;
}
}
2026-03-29 19:45:54 -07:00
// INDEXED BY flat() | rescore(...)
2026-03-29 19:44:44 -07:00
else if ( sqlite3_strnicmp ( key , " indexed " , keyLength ) = = 0 ) {
// expect "by"
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | |
token . token_type ! = TOKEN_TYPE_IDENTIFIER | |
sqlite3_strnicmp ( token . start , " by " , token . end - token . start ) ! = 0 ) {
return SQLITE_ERROR ;
}
// expect index type name
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | |
token . token_type ! = TOKEN_TYPE_IDENTIFIER ) {
return SQLITE_ERROR ;
}
int indexNameLen = token . end - token . start ;
if ( sqlite3_strnicmp ( token . start , " flat " , indexNameLen ) = = 0 ) {
indexType = VEC0_INDEX_TYPE_FLAT ;
// expect '('
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | |
token . token_type ! = TOKEN_TYPE_LPAREN ) {
return SQLITE_ERROR ;
}
// expect ')'
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | |
token . token_type ! = TOKEN_TYPE_RPAREN ) {
return SQLITE_ERROR ;
}
2026-03-29 19:45:54 -07:00
}
# if SQLITE_VEC_ENABLE_RESCORE
else if ( sqlite3_strnicmp ( token . start , " rescore " , indexNameLen ) = = 0 ) {
indexType = VEC0_INDEX_TYPE_RESCORE ;
if ( elementType ! = SQLITE_VEC_ELEMENT_TYPE_FLOAT32 ) {
return SQLITE_ERROR ;
}
// expect '('
rc = vec0_scanner_next ( & scanner , & token ) ;
if ( rc ! = VEC0_TOKEN_RESULT_SOME | | token . token_type ! = TOKEN_TYPE_LPAREN ) {
return SQLITE_ERROR ;
}
char * rescoreErr = NULL ;
rc = vec0_parse_rescore_options ( & scanner , & rescoreConfig , & rescoreErr ) ;
if ( rc ! = SQLITE_OK ) {
if ( rescoreErr ) sqlite3_free ( rescoreErr ) ;
return SQLITE_ERROR ;
}
// validate dimensions for bit quantizer
if ( rescoreConfig . quantizer_type = = VEC0_RESCORE_QUANTIZER_BIT & &
( dimensions % CHAR_BIT ) ! = 0 ) {
return SQLITE_ERROR ;
}
}
# endif
2026-03-29 19:46:23 -07:00
else if ( sqlite3_strnicmp ( token . start , " ivf " , indexNameLen ) = = 0 ) {
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
indexType = VEC0_INDEX_TYPE_IVF ;
memset ( & ivfConfig , 0 , sizeof ( ivfConfig ) ) ;
rc = vec0_parse_ivf_options ( & scanner , & ivfConfig ) ;
if ( rc ! = SQLITE_OK ) {
return SQLITE_ERROR ;
}
2026-03-31 14:51:27 -07:00
if ( ivfConfig . quantizer = = VEC0_IVF_QUANTIZER_BINARY & & ( dimensions % 8 ) ! = 0 ) {
return SQLITE_ERROR ;
}
2026-03-29 19:46:23 -07:00
# else
return SQLITE_ERROR ; // IVF not compiled in
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# endif
} else if ( sqlite3_strnicmp ( token . start , " diskann " , indexNameLen ) = = 0 ) {
# if SQLITE_VEC_ENABLE_DISKANN
indexType = VEC0_INDEX_TYPE_DISKANN ;
rc = vec0_parse_diskann_options ( & scanner , & diskannConfig ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
# else
return SQLITE_ERROR ;
2026-03-29 19:46:23 -07:00
# endif
} else {
2026-03-29 19:44:44 -07:00
// unknown index type
return SQLITE_ERROR ;
}
}
2024-06-24 23:26:11 -07:00
// unknown key
2024-04-20 13:38:58 -07:00
else {
return SQLITE_ERROR ;
}
}
2024-06-24 23:26:11 -07:00
outColumn - > name = sqlite3_mprintf ( " %.*s " , nameLength , name ) ;
if ( ! outColumn - > name ) {
return SQLITE_ERROR ;
}
outColumn - > name_length = nameLength ;
outColumn - > distance_metric = distanceMetric ;
outColumn - > element_type = elementType ;
outColumn - > dimensions = dimensions ;
2026-03-29 19:44:44 -07:00
outColumn - > index_type = indexType ;
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
outColumn - > rescore = rescoreConfig ;
# endif
2026-03-29 19:46:23 -07:00
outColumn - > ivf = ivfConfig ;
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
outColumn - > diskann = diskannConfig ;
2024-06-24 23:26:11 -07:00
return SQLITE_OK ;
2024-04-20 13:38:58 -07:00
}
# pragma region vec_each table function
typedef struct vec_each_vtab vec_each_vtab ;
struct vec_each_vtab {
sqlite3_vtab base ;
} ;
typedef struct vec_each_cursor vec_each_cursor ;
struct vec_each_cursor {
sqlite3_vtab_cursor base ;
2024-04-20 17:02:19 -07:00
i64 iRowid ;
2024-04-20 13:38:58 -07:00
enum VectorElementType vector_type ;
void * vector ;
size_t dimensions ;
vector_cleanup cleanup ;
} ;
static int vec_eachConnect ( sqlite3 * db , void * pAux , int argc ,
const char * const * argv , sqlite3_vtab * * ppVtab ,
char * * pzErr ) {
UNUSED_PARAMETER ( pAux ) ;
UNUSED_PARAMETER ( argc ) ;
UNUSED_PARAMETER ( argv ) ;
2024-07-05 12:07:45 -07:00
UNUSED_PARAMETER ( pzErr ) ;
2024-04-20 13:38:58 -07:00
vec_each_vtab * pNew ;
int rc ;
rc = sqlite3_declare_vtab ( db , " CREATE TABLE x(value, vector hidden) " ) ;
# define VEC_EACH_COLUMN_VALUE 0
# define VEC_EACH_COLUMN_VECTOR 1
if ( rc = = SQLITE_OK ) {
pNew = sqlite3_malloc ( sizeof ( * pNew ) ) ;
* ppVtab = ( sqlite3_vtab * ) pNew ;
if ( pNew = = 0 )
return SQLITE_NOMEM ;
memset ( pNew , 0 , sizeof ( * pNew ) ) ;
}
return rc ;
}
static int vec_eachDisconnect ( sqlite3_vtab * pVtab ) {
vec_each_vtab * p = ( vec_each_vtab * ) pVtab ;
sqlite3_free ( p ) ;
return SQLITE_OK ;
}
static int vec_eachOpen ( sqlite3_vtab * p , sqlite3_vtab_cursor * * ppCursor ) {
UNUSED_PARAMETER ( p ) ;
vec_each_cursor * pCur ;
pCur = sqlite3_malloc ( sizeof ( * pCur ) ) ;
if ( pCur = = 0 )
return SQLITE_NOMEM ;
memset ( pCur , 0 , sizeof ( * pCur ) ) ;
* ppCursor = & pCur - > base ;
return SQLITE_OK ;
}
static int vec_eachClose ( sqlite3_vtab_cursor * cur ) {
vec_each_cursor * pCur = ( vec_each_cursor * ) cur ;
2025-01-10 14:44:37 -08:00
if ( pCur - > vector ) {
pCur - > cleanup ( pCur - > vector ) ;
}
2024-04-20 13:38:58 -07:00
sqlite3_free ( pCur ) ;
return SQLITE_OK ;
}
static int vec_eachBestIndex ( sqlite3_vtab * pVTab ,
sqlite3_index_info * pIdxInfo ) {
2024-07-05 12:07:45 -07:00
UNUSED_PARAMETER ( pVTab ) ;
int hasVector = 0 ;
2024-04-20 13:38:58 -07:00
for ( int i = 0 ; i < pIdxInfo - > nConstraint ; i + + ) {
const struct sqlite3_index_constraint * pCons = & pIdxInfo - > aConstraint [ i ] ;
// printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn,
// pCons->op, pCons->usable);
switch ( pCons - > iColumn ) {
case VEC_EACH_COLUMN_VECTOR : {
if ( pCons - > op = = SQLITE_INDEX_CONSTRAINT_EQ & & pCons - > usable ) {
hasVector = 1 ;
pIdxInfo - > aConstraintUsage [ i ] . argvIndex = 1 ;
pIdxInfo - > aConstraintUsage [ i ] . omit = 1 ;
}
break ;
}
}
}
if ( ! hasVector ) {
2024-07-05 12:07:45 -07:00
return SQLITE_CONSTRAINT ;
2024-04-20 13:38:58 -07:00
}
pIdxInfo - > estimatedCost = ( double ) 100000 ;
pIdxInfo - > estimatedRows = 100000 ;
return SQLITE_OK ;
}
static int vec_eachFilter ( sqlite3_vtab_cursor * pVtabCursor , int idxNum ,
const char * idxStr , int argc , sqlite3_value * * argv ) {
UNUSED_PARAMETER ( idxNum ) ;
UNUSED_PARAMETER ( idxStr ) ;
2024-06-24 23:26:11 -07:00
assert ( argc = = 1 ) ;
2024-04-20 13:38:58 -07:00
vec_each_cursor * pCur = ( vec_each_cursor * ) pVtabCursor ;
2024-07-05 12:07:45 -07:00
if ( pCur - > vector ) {
pCur - > cleanup ( pCur - > vector ) ;
pCur - > vector = NULL ;
}
2024-04-20 13:38:58 -07:00
char * pzErrMsg ;
int rc = vector_from_value ( argv [ 0 ] , & pCur - > vector , & pCur - > dimensions ,
& pCur - > vector_type , & pCur - > cleanup , & pzErrMsg ) ;
if ( rc ! = SQLITE_OK ) {
2026-03-03 08:36:59 -08:00
sqlite3_free ( pzErrMsg ) ;
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR ;
}
pCur - > iRowid = 0 ;
return SQLITE_OK ;
}
static int vec_eachRowid ( sqlite3_vtab_cursor * cur , sqlite_int64 * pRowid ) {
vec_each_cursor * pCur = ( vec_each_cursor * ) cur ;
* pRowid = pCur - > iRowid ;
return SQLITE_OK ;
}
static int vec_eachEof ( sqlite3_vtab_cursor * cur ) {
vec_each_cursor * pCur = ( vec_each_cursor * ) cur ;
2024-04-20 17:02:19 -07:00
return pCur - > iRowid > = ( i64 ) pCur - > dimensions ;
2024-04-20 13:38:58 -07:00
}
static int vec_eachNext ( sqlite3_vtab_cursor * cur ) {
vec_each_cursor * pCur = ( vec_each_cursor * ) cur ;
pCur - > iRowid + + ;
return SQLITE_OK ;
}
static int vec_eachColumn ( sqlite3_vtab_cursor * cur , sqlite3_context * context ,
int i ) {
vec_each_cursor * pCur = ( vec_each_cursor * ) cur ;
switch ( i ) {
case VEC_EACH_COLUMN_VALUE :
switch ( pCur - > vector_type ) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
2024-04-20 17:05:37 -07:00
sqlite3_result_double ( context , ( ( f32 * ) pCur - > vector ) [ pCur - > iRowid ] ) ;
2024-04-20 13:38:58 -07:00
break ;
}
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
2024-04-20 17:02:19 -07:00
u8 x = ( ( u8 * ) pCur - > vector ) [ pCur - > iRowid / CHAR_BIT ] ;
2024-04-20 13:38:58 -07:00
sqlite3_result_int ( context ,
( x & ( 0 b10000000 > > ( ( pCur - > iRowid % CHAR_BIT ) ) ) ) > 0 ) ;
break ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
2024-04-20 17:02:19 -07:00
sqlite3_result_int ( context , ( ( i8 * ) pCur - > vector ) [ pCur - > iRowid ] ) ;
2024-04-20 13:38:58 -07:00
break ;
}
}
break ;
}
return SQLITE_OK ;
}
static sqlite3_module vec_eachModule = {
/* iVersion */ 0 ,
/* xCreate */ 0 ,
/* xConnect */ vec_eachConnect ,
/* xBestIndex */ vec_eachBestIndex ,
/* xDisconnect */ vec_eachDisconnect ,
/* xDestroy */ 0 ,
/* xOpen */ vec_eachOpen ,
/* xClose */ vec_eachClose ,
/* xFilter */ vec_eachFilter ,
/* xNext */ vec_eachNext ,
/* xEof */ vec_eachEof ,
/* xColumn */ vec_eachColumn ,
/* xRowid */ vec_eachRowid ,
/* xUpdate */ 0 ,
/* xBegin */ 0 ,
/* xSync */ 0 ,
/* xCommit */ 0 ,
/* xRollback */ 0 ,
/* xFindMethod */ 0 ,
/* xRename */ 0 ,
/* xSavepoint */ 0 ,
/* xRelease */ 0 ,
/* xRollbackTo */ 0 ,
/* xShadowName */ 0 ,
2024-06-28 10:51:49 -07:00
# if SQLITE_VERSION_NUMBER >= 3044000
2024-05-10 20:51:42 -07:00
/* xIntegrity */ 0
2024-06-13 16:32:57 -07:00
# endif
} ;
2024-04-20 13:38:58 -07:00
# pragma endregion
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
2026-03-29 19:44:44 -07:00
# pragma region vec0 virtual table
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# define VEC0_COLUMN_ID 0
# define VEC0_COLUMN_USERN_START 1
# define VEC0_COLUMN_OFFSET_DISTANCE 1
# define VEC0_COLUMN_OFFSET_K 2
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\""
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# define VEC0_SHADOW_CHUNKS_NAME "\"%w\".\"%w_chunks\""
/// 1) schema, 2) original vtab table name
# define VEC0_SHADOW_CHUNKS_CREATE \
" CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME " ( " \
" chunk_id INTEGER PRIMARY KEY AUTOINCREMENT, " \
" size INTEGER NOT NULL, " \
" validity BLOB NOT NULL, " \
" rowids BLOB NOT NULL " \
" ); "
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# define VEC0_SHADOW_ROWIDS_NAME "\"%w\".\"%w_rowids\""
/// 1) schema, 2) original vtab table name
# define VEC0_SHADOW_ROWIDS_CREATE_BASIC \
" CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME " ( " \
" rowid INTEGER PRIMARY KEY AUTOINCREMENT, " \
" id, " \
" chunk_id INTEGER, " \
" chunk_offset INTEGER " \
" ); "
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// vec0 tables with a text primary keys are still backed by int64 primary keys,
// since a fixed-length rowid is required for vec0 chunks. But we add a new 'id
// text unique' column to emulate a text primary key interface.
# define VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT \
" CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME " ( " \
" rowid INTEGER PRIMARY KEY AUTOINCREMENT, " \
" id TEXT UNIQUE NOT NULL, " \
" chunk_id INTEGER, " \
" chunk_offset INTEGER " \
" ); "
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/// 1) schema, 2) original vtab table name
# define VEC0_SHADOW_VECTOR_N_NAME "\"%w\".\"%w_vector_chunks%02d\""
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/// 1) schema, 2) original vtab table name
//
// IMPORTANT: "rowid" is declared as PRIMARY KEY but WITHOUT the INTEGER type.
// This means it is NOT a true SQLite rowid alias — the user-defined "rowid"
// column and the internal SQLite rowid (_rowid_) are two separate values.
// When inserting, both must be set explicitly to keep them in sync. See the
// _rowid_ bindings in vec0_new_chunk() and the explanation in
// SHADOW_TABLE_ROWID_QUIRK below.
# define VEC0_SHADOW_VECTOR_N_CREATE \
" CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME " ( " \
" rowid PRIMARY KEY, " \
" vectors BLOB NOT NULL " \
" ); "
2024-06-24 23:26:11 -07:00
2026-03-29 19:44:44 -07:00
# define VEC0_SHADOW_AUXILIARY_NAME "\"%w\".\"%w_auxiliary\""
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# define VEC0_SHADOW_METADATA_N_NAME "\"%w\".\"%w_metadatachunks%02d\""
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# define VEC0_SHADOW_VECTORS_N_NAME "\"%w\".\"%w_vectors%02d\""
# define VEC0_SHADOW_DISKANN_NODES_N_NAME "\"%w\".\"%w_diskann_nodes%02d\""
# define VEC0_SHADOW_DISKANN_BUFFER_N_NAME "\"%w\".\"%w_diskann_buffer%02d\""
2026-03-29 19:44:44 -07:00
# define VEC0_SHADOW_METADATA_TEXT_DATA_NAME "\"%w\".\"%w_metadatatext%02d\""
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# define VEC_INTERAL_ERROR "Internal sqlite-vec error: "
# define REPORT_URL "https: //github.com/asg017/sqlite-vec/issues/new"
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
typedef struct vec0_vtab vec0_vtab ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# define VEC0_MAX_VECTOR_COLUMNS 16
# define VEC0_MAX_PARTITION_COLUMNS 4
# define VEC0_MAX_AUXILIARY_COLUMNS 16
# define VEC0_MAX_METADATA_COLUMNS 16
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192
# define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH 16
# define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH 12
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
typedef enum {
// vector column, ie "contents_embedding float[1024]"
SQLITE_VEC0_USER_COLUMN_KIND_VECTOR = 1 ,
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// partition key column, ie "user_id integer partition key"
SQLITE_VEC0_USER_COLUMN_KIND_PARTITION = 2 ,
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
//
SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY = 3 ,
// metadata column that can be filtered, ie "genre text"
SQLITE_VEC0_USER_COLUMN_KIND_METADATA = 4 ,
} vec0_user_column_kind ;
struct vec0_vtab {
2024-04-20 13:38:58 -07:00
sqlite3_vtab base ;
2026-03-29 19:44:44 -07:00
// the SQLite connection of the host database
sqlite3 * db ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// True if the primary key of the vec0 table has a column type TEXT.
// Will change the schema of the _rowids table, and insert/query logic.
int pkIsText ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// number of defined vector columns.
int numVectorColumns ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// number of defined PARTITION KEY columns.
int numPartitionColumns ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// number of defined auxiliary columns
int numAuxiliaryColumns ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// number of defined metadata columns
int numMetadataColumns ;
2024-04-20 13:38:58 -07:00
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
// Name of the schema the table exists on.
// Must be freed with sqlite3_free()
char * schemaName ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// Name of the table the table exists on.
// Must be freed with sqlite3_free()
char * tableName ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
// Name of the _rowids shadow table.
// Must be freed with sqlite3_free()
char * shadowRowidsName ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
// Name of the _chunks shadow table.
// Must be freed with sqlite3_free()
char * shadowChunksName ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
// contains enum vec0_user_column_kind values for up to
// numVectorColumns + numPartitionColumns entries
vec0_user_column_kind user_column_kinds [ VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS ] ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
uint8_t user_column_idxs [ VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS ] ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
// Name of all the vector chunk shadow tables.
// Ex '_vector_chunks00'
// Only the first numVectorColumns entries will be available.
// The first numVectorColumns entries must be freed with sqlite3_free()
char * shadowVectorChunksNames [ VEC0_MAX_VECTOR_COLUMNS ] ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
// Name of all rescore chunk shadow tables, ie `_rescore_chunks00`
// Only populated for vector columns with rescore enabled.
// Must be freed with sqlite3_free()
char * shadowRescoreChunksNames [ VEC0_MAX_VECTOR_COLUMNS ] ;
// Name of all rescore vector shadow tables, ie `_rescore_vectors00`
// Rowid-keyed table for fast random-access float vector reads during rescore.
// Only populated for vector columns with rescore enabled.
// Must be freed with sqlite3_free()
char * shadowRescoreVectorsNames [ VEC0_MAX_VECTOR_COLUMNS ] ;
# endif
2026-03-29 19:44:44 -07:00
// Name of all metadata chunk shadow tables, ie `_metadatachunks00`
// Only the first numMetadataColumns entries will be available.
// The first numMetadataColumns entries must be freed with sqlite3_free()
char * shadowMetadataChunksNames [ VEC0_MAX_METADATA_COLUMNS ] ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
struct VectorColumnDefinition vector_columns [ VEC0_MAX_VECTOR_COLUMNS ] ;
struct Vec0PartitionColumnDefinition paritition_columns [ VEC0_MAX_PARTITION_COLUMNS ] ;
struct Vec0AuxiliaryColumnDefinition auxiliary_columns [ VEC0_MAX_AUXILIARY_COLUMNS ] ;
struct Vec0MetadataColumnDefinition metadata_columns [ VEC0_MAX_METADATA_COLUMNS ] ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
int chunk_size ;
2024-06-28 10:51:49 -07:00
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// IVF cached state per vector column
char * shadowIvfCellsNames [ VEC0_MAX_VECTOR_COLUMNS ] ; // table name for blob_open
int ivfTrainedCache [ VEC0_MAX_VECTOR_COLUMNS ] ; // -1=unknown, 0=no, 1=yes
sqlite3_stmt * stmtIvfCellMeta [ VEC0_MAX_VECTOR_COLUMNS ] ; // SELECT n_vectors, length(validity)*8 FROM cells WHERE cell_id=?
sqlite3_stmt * stmtIvfCellUpdateN [ VEC0_MAX_VECTOR_COLUMNS ] ; // UPDATE cells SET n_vectors=n_vectors+? WHERE cell_id=?
sqlite3_stmt * stmtIvfRowidMapInsert [ VEC0_MAX_VECTOR_COLUMNS ] ; // INSERT INTO rowid_map(rowid,cell_id,slot) VALUES(?,?,?)
sqlite3_stmt * stmtIvfRowidMapLookup [ VEC0_MAX_VECTOR_COLUMNS ] ; // SELECT cell_id,slot FROM rowid_map WHERE rowid=?
sqlite3_stmt * stmtIvfRowidMapDelete [ VEC0_MAX_VECTOR_COLUMNS ] ; // DELETE FROM rowid_map WHERE rowid=?
sqlite3_stmt * stmtIvfCentroidsAll [ VEC0_MAX_VECTOR_COLUMNS ] ; // SELECT centroid_id,centroid FROM centroids
# endif
2026-03-29 19:44:44 -07:00
// select latest chunk from _chunks, getting chunk_id
sqlite3_stmt * stmtLatestChunk ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
/**
* Statement to insert a row into the _rowids table , with a rowid .
* Parameters :
* 1 : int64 , rowid to insert
* Result columns : none
* SQL : " INSERT INTO _rowids(rowid) VALUES (?) "
*
* Must be cleaned up with sqlite3_finalize ( ) .
*/
sqlite3_stmt * stmtRowidsInsertRowid ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
/**
* Statement to insert a row into the _rowids table , with an id .
* The id column isn ' t a tradition primary key , but instead a unique
* column to handle " text primary key " vec0 tables . The true int64 rowid
* can be retrieved after inserting with sqlite3_last_rowid ( ) .
*
* Parameters :
* 1 : text or null , id to insert
* Result columns : none
*
* Must be cleaned up with sqlite3_finalize ( ) .
*/
sqlite3_stmt * stmtRowidsInsertId ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
/**
* Statement to update the " position " columns chunk_id and chunk_offset for
* a given _rowids row . Used when the " next available " chunk position is found
* for a vector .
*
* Parameters :
* 1 : int64 , chunk_id value
* 2 : int64 , chunk_offset value
* 3 : int64 , rowid value
* Result columns : none
*
* Must be cleaned up with sqlite3_finalize ( ) .
*/
sqlite3_stmt * stmtRowidsUpdatePosition ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
/**
* Statement to quickly find the chunk_id + chunk_offset of a given row .
* Parameters :
* 1 : rowid of the row / vector to lookup
* Result columns :
* 0 : chunk_id ( i64 )
* 1 : chunk_offset ( i64 )
* SQL : " SELECT id, chunk_id, chunk_offset FROM _rowids WHERE rowid = ? " "
*
* Must be cleaned up with sqlite3_finalize ( ) .
*/
sqlite3_stmt * stmtRowidsGetChunkPosition ;
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// === DiskANN additions ===
# if SQLITE_VEC_ENABLE_DISKANN
// Shadow table names for DiskANN, per vector column
// e.g., "{schema}"."{table}_vectors{00..15}"
char * shadowVectorsNames [ VEC0_MAX_VECTOR_COLUMNS ] ;
// e.g., "{schema}"."{table}_diskann_nodes{00..15}"
char * shadowDiskannNodesNames [ VEC0_MAX_VECTOR_COLUMNS ] ;
// Prepared statements for DiskANN operations (per vector column)
// These will be lazily prepared on first use.
sqlite3_stmt * stmtDiskannNodeRead [ VEC0_MAX_VECTOR_COLUMNS ] ;
sqlite3_stmt * stmtDiskannNodeWrite [ VEC0_MAX_VECTOR_COLUMNS ] ;
sqlite3_stmt * stmtDiskannNodeInsert [ VEC0_MAX_VECTOR_COLUMNS ] ;
sqlite3_stmt * stmtVectorsRead [ VEC0_MAX_VECTOR_COLUMNS ] ;
sqlite3_stmt * stmtVectorsInsert [ VEC0_MAX_VECTOR_COLUMNS ] ;
# endif
2026-03-29 19:44:44 -07:00
} ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
// Forward declarations for rescore functions (defined in sqlite-vec-rescore.c,
// included later after all helpers they depend on are defined).
static int rescore_create_tables ( vec0_vtab * p , sqlite3 * db , char * * pzErr ) ;
static int rescore_drop_tables ( vec0_vtab * p ) ;
static int rescore_new_chunk ( vec0_vtab * p , i64 chunk_rowid ) ;
static int rescore_on_insert ( vec0_vtab * p , i64 chunk_rowid , i64 chunk_offset ,
i64 rowid , void * vectorDatas [ ] ) ;
static int rescore_on_delete ( vec0_vtab * p , i64 chunk_id , u64 chunk_offset , i64 rowid ) ;
static int rescore_delete_chunk ( vec0_vtab * p , i64 chunk_id ) ;
# endif
2026-03-29 19:44:44 -07:00
/**
* @ brief Finalize all the sqlite3_stmt members in a vec0_vtab .
*
* @ param p vec0_vtab pointer
*/
void vec0_free_resources ( vec0_vtab * p ) {
sqlite3_finalize ( p - > stmtLatestChunk ) ;
p - > stmtLatestChunk = NULL ;
sqlite3_finalize ( p - > stmtRowidsInsertRowid ) ;
p - > stmtRowidsInsertRowid = NULL ;
sqlite3_finalize ( p - > stmtRowidsInsertId ) ;
p - > stmtRowidsInsertId = NULL ;
sqlite3_finalize ( p - > stmtRowidsUpdatePosition ) ;
p - > stmtRowidsUpdatePosition = NULL ;
sqlite3_finalize ( p - > stmtRowidsGetChunkPosition ) ;
p - > stmtRowidsGetChunkPosition = NULL ;
2026-03-29 19:46:23 -07:00
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
for ( int i = 0 ; i < VEC0_MAX_VECTOR_COLUMNS ; i + + ) {
sqlite3_finalize ( p - > stmtIvfCellMeta [ i ] ) ; p - > stmtIvfCellMeta [ i ] = NULL ;
sqlite3_finalize ( p - > stmtIvfCellUpdateN [ i ] ) ; p - > stmtIvfCellUpdateN [ i ] = NULL ;
sqlite3_finalize ( p - > stmtIvfRowidMapInsert [ i ] ) ; p - > stmtIvfRowidMapInsert [ i ] = NULL ;
sqlite3_finalize ( p - > stmtIvfRowidMapLookup [ i ] ) ; p - > stmtIvfRowidMapLookup [ i ] = NULL ;
sqlite3_finalize ( p - > stmtIvfRowidMapDelete [ i ] ) ; p - > stmtIvfRowidMapDelete [ i ] = NULL ;
sqlite3_finalize ( p - > stmtIvfCentroidsAll [ i ] ) ; p - > stmtIvfCentroidsAll [ i ] = NULL ;
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
sqlite3_finalize ( p - > stmtDiskannNodeRead [ i ] ) ; p - > stmtDiskannNodeRead [ i ] = NULL ;
sqlite3_finalize ( p - > stmtDiskannNodeWrite [ i ] ) ; p - > stmtDiskannNodeWrite [ i ] = NULL ;
sqlite3_finalize ( p - > stmtDiskannNodeInsert [ i ] ) ; p - > stmtDiskannNodeInsert [ i ] = NULL ;
sqlite3_finalize ( p - > stmtVectorsRead [ i ] ) ; p - > stmtVectorsRead [ i ] = NULL ;
sqlite3_finalize ( p - > stmtVectorsInsert [ i ] ) ; p - > stmtVectorsInsert [ i ] = NULL ;
# endif
2026-03-29 19:46:23 -07:00
}
# endif
2026-03-29 19:44:44 -07:00
}
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
/**
* @ brief Free all memory and sqlite3_stmt members of a vec0_vtab
*
* @ param p vec0_vtab pointer
*/
void vec0_free ( vec0_vtab * p ) {
vec0_free_resources ( p ) ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
sqlite3_free ( p - > schemaName ) ;
p - > schemaName = NULL ;
sqlite3_free ( p - > tableName ) ;
p - > tableName = NULL ;
sqlite3_free ( p - > shadowChunksName ) ;
p - > shadowChunksName = NULL ;
sqlite3_free ( p - > shadowRowidsName ) ;
p - > shadowRowidsName = NULL ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
sqlite3_free ( p - > shadowVectorChunksNames [ i ] ) ;
p - > shadowVectorChunksNames [ i ] = NULL ;
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
sqlite3_free ( p - > shadowIvfCellsNames [ i ] ) ;
p - > shadowIvfCellsNames [ i ] = NULL ;
# endif
2024-06-28 10:51:49 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
sqlite3_free ( p - > shadowRescoreChunksNames [ i ] ) ;
p - > shadowRescoreChunksNames [ i ] = NULL ;
sqlite3_free ( p - > shadowRescoreVectorsNames [ i ] ) ;
p - > shadowRescoreVectorsNames [ i ] = NULL ;
# endif
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
sqlite3_free ( p - > shadowVectorsNames [ i ] ) ;
p - > shadowVectorsNames [ i ] = NULL ;
sqlite3_free ( p - > shadowDiskannNodesNames [ i ] ) ;
p - > shadowDiskannNodesNames [ i ] = NULL ;
# endif
2026-03-29 19:44:44 -07:00
sqlite3_free ( p - > vector_columns [ i ] . name ) ;
p - > vector_columns [ i ] . name = NULL ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numPartitionColumns ; i + + ) {
sqlite3_free ( p - > paritition_columns [ i ] . name ) ;
p - > paritition_columns [ i ] . name = NULL ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numAuxiliaryColumns ; i + + ) {
sqlite3_free ( p - > auxiliary_columns [ i ] . name ) ;
p - > auxiliary_columns [ i ] . name = NULL ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numMetadataColumns ; i + + ) {
sqlite3_free ( p - > metadata_columns [ i ] . name ) ;
p - > metadata_columns [ i ] . name = NULL ;
}
2024-06-28 10:51:49 -07:00
}
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
# include "sqlite-vec-diskann.c"
# else
static int vec0_all_columns_diskann ( vec0_vtab * p ) { ( void ) p ; return 0 ; }
# endif
2026-03-29 19:44:44 -07:00
int vec0_num_defined_user_columns ( vec0_vtab * p ) {
return p - > numVectorColumns + p - > numPartitionColumns + p - > numAuxiliaryColumns + p - > numMetadataColumns ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
/**
* @ brief Returns the index of the distance hidden column for the given vec0
* table .
*
* @ param p vec0 table
* @ return int
*/
int vec0_column_distance_idx ( vec0_vtab * p ) {
return VEC0_COLUMN_USERN_START + ( vec0_num_defined_user_columns ( p ) - 1 ) +
VEC0_COLUMN_OFFSET_DISTANCE ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
/**
* @ brief Returns the index of the k hidden column for the given vec0 table .
*
* @ param p vec0 table
* @ return int k column index
*/
int vec0_column_k_idx ( vec0_vtab * p ) {
return VEC0_COLUMN_USERN_START + ( vec0_num_defined_user_columns ( p ) - 1 ) +
VEC0_COLUMN_OFFSET_K ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
/**
* Returns 1 if the given column - based index is a valid vector column ,
* 0 otherwise .
*/
int vec0_column_idx_is_vector ( vec0_vtab * pVtab , int column_idx ) {
return column_idx > = VEC0_COLUMN_USERN_START & &
column_idx < = ( VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns ( pVtab ) - 1 ) & &
pVtab - > user_column_kinds [ column_idx - VEC0_COLUMN_USERN_START ] = = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
/**
* Returns the vector index of the given user column index .
* ONLY call if validated with vec0_column_idx_is_vector before
*/
int vec0_column_idx_to_vector_idx ( vec0_vtab * pVtab , int column_idx ) {
UNUSED_PARAMETER ( pVtab ) ;
return pVtab - > user_column_idxs [ column_idx - VEC0_COLUMN_USERN_START ] ;
}
/**
* Returns 1 if the given column - based index is a " partition key " column ,
* 0 otherwise .
*/
int vec0_column_idx_is_partition ( vec0_vtab * pVtab , int column_idx ) {
return column_idx > = VEC0_COLUMN_USERN_START & &
column_idx < = ( VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns ( pVtab ) - 1 ) & &
pVtab - > user_column_kinds [ column_idx - VEC0_COLUMN_USERN_START ] = = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/**
* Returns the partition column index of the given user column index .
* ONLY call if validated with vec0_column_idx_is_vector before
*/
int vec0_column_idx_to_partition_idx ( vec0_vtab * pVtab , int column_idx ) {
UNUSED_PARAMETER ( pVtab ) ;
return pVtab - > user_column_idxs [ column_idx - VEC0_COLUMN_USERN_START ] ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/**
* Returns 1 if the given column - based index is a auxiliary column ,
* 0 otherwise .
*/
int vec0_column_idx_is_auxiliary ( vec0_vtab * pVtab , int column_idx ) {
return column_idx > = VEC0_COLUMN_USERN_START & &
column_idx < = ( VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns ( pVtab ) - 1 ) & &
pVtab - > user_column_kinds [ column_idx - VEC0_COLUMN_USERN_START ] = = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
/**
* Returns the auxiliary column index of the given user column index .
* ONLY call if validated with vec0_column_idx_to_partition_idx before
*/
int vec0_column_idx_to_auxiliary_idx ( vec0_vtab * pVtab , int column_idx ) {
UNUSED_PARAMETER ( pVtab ) ;
return pVtab - > user_column_idxs [ column_idx - VEC0_COLUMN_USERN_START ] ;
}
/**
* Returns 1 if the given column - based index is a metadata column ,
* 0 otherwise .
*/
int vec0_column_idx_is_metadata ( vec0_vtab * pVtab , int column_idx ) {
return column_idx > = VEC0_COLUMN_USERN_START & &
column_idx < = ( VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns ( pVtab ) - 1 ) & &
pVtab - > user_column_kinds [ column_idx - VEC0_COLUMN_USERN_START ] = = SQLITE_VEC0_USER_COLUMN_KIND_METADATA ;
}
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
/**
* Returns the metadata column index of the given user column index .
* ONLY call if validated with vec0_column_idx_is_metadata before
*/
int vec0_column_idx_to_metadata_idx ( vec0_vtab * pVtab , int column_idx ) {
UNUSED_PARAMETER ( pVtab ) ;
return pVtab - > user_column_idxs [ column_idx - VEC0_COLUMN_USERN_START ] ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/**
* @ brief Retrieve the chunk_id , chunk_offset , and possible " id " value
* of a vec0_vtab row with the provided rowid
*
* @ param p vec0_vtab
* @ param rowid the rowid of the row to query
* @ param id output , optional sqlite3_value to provide the id .
* Useful for text PK rows . Must be freed with sqlite3_value_free ( )
* @ param chunk_id output , the chunk_id the row belongs to
* @ param chunk_offset output , the offset within the chunk the row belongs to
* @ return SQLITE_ROW on success , error code otherwise . SQLITE_EMPTY if row DNE
*/
int vec0_get_chunk_position ( vec0_vtab * p , i64 rowid , sqlite3_value * * id ,
i64 * chunk_id , i64 * chunk_offset ) {
int rc ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( ! p - > stmtRowidsGetChunkPosition ) {
const char * zSql =
sqlite3_mprintf ( " SELECT id, chunk_id, chunk_offset "
" FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE rowid = ? " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & p - > stmtRowidsGetChunkPosition , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
2024-06-28 10:51:49 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
vtab_set_error (
& p - > base , VEC_INTERAL_ERROR
" could not initialize 'rowids get chunk position' statement " ) ;
goto cleanup ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
sqlite3_bind_int64 ( p - > stmtRowidsGetChunkPosition , 1 , rowid ) ;
rc = sqlite3_step ( p - > stmtRowidsGetChunkPosition ) ;
// special case: when no results, return SQLITE_EMPTY to convey "that chunk
// position doesnt exist"
if ( rc = = SQLITE_DONE ) {
rc = SQLITE_EMPTY ;
goto cleanup ;
}
if ( rc ! = SQLITE_ROW ) {
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( id ) {
sqlite3_value * value =
sqlite3_column_value ( p - > stmtRowidsGetChunkPosition , 0 ) ;
* id = sqlite3_value_dup ( value ) ;
if ( ! * id ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
2024-06-24 23:26:11 -07:00
}
2026-03-29 19:44:44 -07:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( chunk_id ) {
* chunk_id = sqlite3_column_int64 ( p - > stmtRowidsGetChunkPosition , 1 ) ;
}
if ( chunk_offset ) {
* chunk_offset = sqlite3_column_int64 ( p - > stmtRowidsGetChunkPosition , 2 ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
rc = SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
cleanup :
sqlite3_reset ( p - > stmtRowidsGetChunkPosition ) ;
sqlite3_clear_bindings ( p - > stmtRowidsGetChunkPosition ) ;
return rc ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
/**
* @ brief Return the id value from the _rowids table where _rowids . rowid =
* rowid .
*
* @ param pVtab : vec0 table to query
* @ param rowid : rowid of the row to query .
* @ param out : A dup ' ed sqlite3_value of the id column . Might be null .
* Must be cleaned up with sqlite3_value_free ( ) .
* @ returns SQLITE_OK on success , error code on failure
*/
int vec0_get_id_value_from_rowid ( vec0_vtab * pVtab , i64 rowid ,
sqlite3_value * * out ) {
// PERF: different strategy than get_chunk_position?
return vec0_get_chunk_position ( ( vec0_vtab * ) pVtab , rowid , out , NULL , NULL ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
int vec0_rowid_from_id ( vec0_vtab * p , sqlite3_value * valueId , i64 * rowid ) {
sqlite3_stmt * stmt = NULL ;
int rc ;
char * zSql ;
zSql = sqlite3_mprintf ( " SELECT rowid "
" FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE id = ? " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
sqlite3_bind_value ( stmt , 1 , valueId ) ;
rc = sqlite3_step ( stmt ) ;
if ( rc = = SQLITE_DONE ) {
rc = SQLITE_EMPTY ;
goto cleanup ;
}
if ( rc ! = SQLITE_ROW ) {
goto cleanup ;
}
* rowid = sqlite3_column_int64 ( stmt , 0 ) ;
rc = sqlite3_step ( stmt ) ;
if ( rc ! = SQLITE_DONE ) {
goto cleanup ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
rc = SQLITE_OK ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
cleanup :
sqlite3_finalize ( stmt ) ;
return rc ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
int vec0_result_id ( vec0_vtab * p , sqlite3_context * context , i64 rowid ) {
if ( ! p - > pkIsText ) {
sqlite3_result_int64 ( context , rowid ) ;
return SQLITE_OK ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_value * valueId ;
int rc = vec0_get_id_value_from_rowid ( p , rowid , & valueId ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
if ( ! valueId ) {
sqlite3_result_error_nomem ( context ) ;
} else {
sqlite3_result_value ( context , valueId ) ;
sqlite3_value_free ( valueId ) ;
2024-06-28 10:51:49 -07:00
}
return SQLITE_OK ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/**
* @ brief
*
* @ param pVtab : virtual table to query
* @ param rowid : row to lookup
* @ param vector_column_idx : which vector column to query
* @ param outVector : Output pointer to the vector buffer .
* Must be sqlite3_free ( ) ' ed .
* @ param outVectorSize : Pointer to a int where the size of outVector
* will be stored .
* @ return int SQLITE_OK on success .
*/
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// Forward declaration — defined in sqlite-vec-ivf.c (included later)
static int ivf_get_vector_data ( vec0_vtab * p , i64 rowid , int col_idx ,
void * * outVector , int * outVectorSize ) ;
# endif
2026-03-29 19:44:44 -07:00
int vec0_get_vector_data ( vec0_vtab * pVtab , i64 rowid , int vector_column_idx ,
void * * outVector , int * outVectorSize ) {
vec0_vtab * p = pVtab ;
int rc , brc ;
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
// DiskANN fast path: read from _vectors table
if ( p - > vector_columns [ vector_column_idx ] . index_type = = VEC0_INDEX_TYPE_DISKANN ) {
void * vec = NULL ;
int vecSize ;
rc = diskann_vector_read ( p , vector_column_idx , rowid , & vec , & vecSize ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & pVtab - > base ,
" Could not fetch vector data for %lld from DiskANN vectors table " ,
rowid ) ;
return SQLITE_ERROR ;
}
* outVector = vec ;
if ( outVectorSize ) * outVectorSize = vecSize ;
return SQLITE_OK ;
}
# endif
2026-03-29 19:44:44 -07:00
i64 chunk_id ;
i64 chunk_offset ;
2026-03-29 19:46:23 -07:00
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// IVF-indexed columns store vectors in _ivf_cells, not _vector_chunks
if ( p - > vector_columns [ vector_column_idx ] . index_type = = VEC0_INDEX_TYPE_IVF ) {
return ivf_get_vector_data ( p , rowid , vector_column_idx , outVector , outVectorSize ) ;
}
# endif
2026-03-29 19:44:44 -07:00
size_t size ;
void * buf = NULL ;
int blobOffset ;
sqlite3_blob * vectorBlob = NULL ;
assert ( ( vector_column_idx > = 0 ) & &
( vector_column_idx < pVtab - > numVectorColumns ) ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
// Rescore columns store float vectors in _rescore_vectors (rowid-keyed)
if ( p - > vector_columns [ vector_column_idx ] . index_type = = VEC0_INDEX_TYPE_RESCORE ) {
size = vector_column_byte_size ( p - > vector_columns [ vector_column_idx ] ) ;
rc = sqlite3_blob_open ( p - > db , p - > schemaName ,
p - > shadowRescoreVectorsNames [ vector_column_idx ] ,
" vector " , rowid , 0 , & vectorBlob ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & pVtab - > base ,
" Could not fetch vector data for %lld from rescore vectors " ,
rowid ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
buf = sqlite3_malloc ( size ) ;
if ( ! buf ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
rc = sqlite3_blob_read ( vectorBlob , buf , size , 0 ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_free ( buf ) ;
buf = NULL ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
* outVector = buf ;
if ( outVectorSize ) {
* outVectorSize = size ;
}
rc = SQLITE_OK ;
goto cleanup ;
}
# endif /* SQLITE_VEC_ENABLE_RESCORE */
2026-03-29 19:44:44 -07:00
rc = vec0_get_chunk_position ( pVtab , rowid , NULL , & chunk_id , & chunk_offset ) ;
if ( rc = = SQLITE_EMPTY ) {
vtab_set_error ( & pVtab - > base , " Could not find a row with rowid %lld " , rowid ) ;
goto cleanup ;
}
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_open ( p - > db , p - > schemaName ,
p - > shadowVectorChunksNames [ vector_column_idx ] ,
" vectors " , chunk_id , 0 , & vectorBlob ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & pVtab - > base ,
" Could not fetch vector data for %lld, opening blob failed " ,
rowid ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
size = vector_column_byte_size ( pVtab - > vector_columns [ vector_column_idx ] ) ;
blobOffset = chunk_offset * size ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
buf = sqlite3_malloc ( size ) ;
if ( ! buf ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_read ( vectorBlob , buf , size , blobOffset ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_free ( buf ) ;
buf = NULL ;
vtab_set_error (
& pVtab - > base ,
" Could not fetch vector data for %lld, reading from blob failed " ,
rowid ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
* outVector = buf ;
if ( outVectorSize ) {
* outVectorSize = size ;
}
rc = SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
cleanup :
brc = sqlite3_blob_close ( vectorBlob ) ;
if ( ( rc = = SQLITE_OK ) & & ( brc ! = SQLITE_OK ) ) {
vtab_set_error (
& p - > base , VEC_INTERAL_ERROR
" unknown error, could not close vector blob, please file an issue " ) ;
return brc ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
return rc ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/**
* @ brief Retrieve the sqlite3_value of the i ' th partition value for the given row .
*
* @ param pVtab - the vec0_vtab in questions
* @ param rowid - rowid of target row
* @ param partition_idx - which partition column to retrieve
* @ param outValue - output sqlite3_value
* @ return int - SQLITE_OK on success , otherwise error code
*/
int vec0_get_partition_value_for_rowid ( vec0_vtab * pVtab , i64 rowid , int partition_idx , sqlite3_value * * outValue ) {
int rc ;
i64 chunk_id ;
i64 chunk_offset ;
rc = vec0_get_chunk_position ( pVtab , rowid , NULL , & chunk_id , & chunk_offset ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
sqlite3_stmt * stmt = NULL ;
char * zSql = sqlite3_mprintf ( " SELECT partition%02d FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE chunk_id = ? " , partition_idx , pVtab - > schemaName , pVtab - > tableName ) ;
if ( ! zSql ) {
return SQLITE_NOMEM ;
}
rc = sqlite3_prepare_v2 ( pVtab - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
sqlite3_bind_int64 ( stmt , 1 , chunk_id ) ;
rc = sqlite3_step ( stmt ) ;
if ( rc ! = SQLITE_ROW ) {
rc = SQLITE_ERROR ;
goto done ;
}
* outValue = sqlite3_value_dup ( sqlite3_column_value ( stmt , 0 ) ) ;
if ( ! * outValue ) {
rc = SQLITE_NOMEM ;
goto done ;
}
rc = SQLITE_OK ;
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
done :
sqlite3_finalize ( stmt ) ;
return rc ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
}
2024-06-24 23:26:11 -07:00
2026-03-29 19:44:44 -07:00
/**
* @ brief Get the value of an auxiliary column for the given rowid
*
* @ param pVtab vec0_vtab
* @ param rowid the rowid of the row to lookup
* @ param auxiliary_idx aux index of the column we care about
* @ param outValue Output sqlite3_value to store
* @ return int SQLITE_OK on success , error code otherwise
*/
int vec0_get_auxiliary_value_for_rowid ( vec0_vtab * pVtab , i64 rowid , int auxiliary_idx , sqlite3_value * * outValue ) {
int rc ;
sqlite3_stmt * stmt = NULL ;
char * zSql = sqlite3_mprintf ( " SELECT value%02d FROM " VEC0_SHADOW_AUXILIARY_NAME " WHERE rowid = ? " , auxiliary_idx , pVtab - > schemaName , pVtab - > tableName ) ;
if ( ! zSql ) {
return SQLITE_NOMEM ;
}
rc = sqlite3_prepare_v2 ( pVtab - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
sqlite3_bind_int64 ( stmt , 1 , rowid ) ;
rc = sqlite3_step ( stmt ) ;
if ( rc ! = SQLITE_ROW ) {
rc = SQLITE_ERROR ;
goto done ;
}
* outValue = sqlite3_value_dup ( sqlite3_column_value ( stmt , 0 ) ) ;
if ( ! * outValue ) {
rc = SQLITE_NOMEM ;
goto done ;
}
rc = SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
done :
sqlite3_finalize ( stmt ) ;
return rc ;
}
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
/**
* @ brief Result the given metadata value for the given row and metadata column index .
* Will traverse the metadatachunksNN table with BLOB I / 0 for the given rowid .
*
* @ param p
* @ param rowid
* @ param metadata_idx
* @ param context
* @ return int
*/
int vec0_result_metadata_value_for_rowid ( vec0_vtab * p , i64 rowid , int metadata_idx , sqlite3_context * context ) {
int rc ;
i64 chunk_id ;
i64 chunk_offset ;
rc = vec0_get_chunk_position ( p , rowid , NULL , & chunk_id , & chunk_offset ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
sqlite3_blob * blobValue ;
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowMetadataChunksNames [ metadata_idx ] , " data " , chunk_id , 0 , & blobValue ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2024-10-11 09:09:32 -07:00
2026-03-29 19:44:44 -07:00
switch ( p - > metadata_columns [ metadata_idx ] . kind ) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN : {
u8 block ;
rc = sqlite3_blob_read ( blobValue , & block , sizeof ( block ) , chunk_offset / CHAR_BIT ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
int value = block > > ( ( chunk_offset % CHAR_BIT ) ) & 1 ;
sqlite3_result_int ( context , value ) ;
break ;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER : {
i64 value ;
rc = sqlite3_blob_read ( blobValue , & value , sizeof ( value ) , chunk_offset * sizeof ( i64 ) ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
sqlite3_result_int64 ( context , value ) ;
break ;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT : {
double value ;
rc = sqlite3_blob_read ( blobValue , & value , sizeof ( value ) , chunk_offset * sizeof ( double ) ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
sqlite3_result_double ( context , value ) ;
break ;
}
case VEC0_METADATA_COLUMN_KIND_TEXT : {
u8 view [ VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
rc = sqlite3_blob_read ( blobValue , & view , VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH , chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
int length = ( ( int * ) view ) [ 0 ] ;
if ( length < = VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
sqlite3_result_text ( context , ( const char * ) ( view + 4 ) , length , SQLITE_TRANSIENT ) ;
}
else {
sqlite3_stmt * stmt ;
const char * zSql = sqlite3_mprintf ( " SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ? " , p - > schemaName , p - > tableName , metadata_idx ) ;
if ( ! zSql ) {
rc = SQLITE_ERROR ;
goto done ;
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
sqlite3_bind_int64 ( stmt , 1 , rowid ) ;
rc = sqlite3_step ( stmt ) ;
if ( rc ! = SQLITE_ROW ) {
sqlite3_finalize ( stmt ) ;
rc = SQLITE_ERROR ;
goto done ;
}
sqlite3_result_value ( context , sqlite3_column_value ( stmt , 0 ) ) ;
sqlite3_finalize ( stmt ) ;
rc = SQLITE_OK ;
}
break ;
}
}
done :
// blobValue is read-only, will not fail on close
sqlite3_blob_close ( blobValue ) ;
return rc ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
int vec0_get_latest_chunk_rowid ( vec0_vtab * p , i64 * chunk_rowid , sqlite3_value * * partitionKeyValues ) {
int rc ;
const char * zSql ;
// lazy initialize stmtLatestChunk when needed. May be cleared during xSync()
if ( ! p - > stmtLatestChunk ) {
if ( p - > numPartitionColumns > 0 ) {
sqlite3_str * s = sqlite3_str_new ( NULL ) ;
sqlite3_str_appendf ( s , " SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE " ,
p - > schemaName , p - > tableName ) ;
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numPartitionColumns ; i + + ) {
if ( i ! = 0 ) {
sqlite3_str_appendall ( s , " AND " ) ;
}
sqlite3_str_appendf ( s , " partition%02d = ? " , i ) ;
}
zSql = sqlite3_str_finish ( s ) ;
} else {
zSql = sqlite3_mprintf ( " SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME ,
p - > schemaName , p - > tableName ) ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & p - > stmtLatestChunk , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( rc ! = SQLITE_OK ) {
// IMP: V21406_05476
vtab_set_error ( & p - > base , VEC_INTERAL_ERROR
" could not initialize 'latest chunk' statement " ) ;
goto cleanup ;
}
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numPartitionColumns ; i + + ) {
sqlite3_bind_value ( p - > stmtLatestChunk , i + 1 , ( partitionKeyValues [ i ] ) ) ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_step ( p - > stmtLatestChunk ) ;
if ( rc ! = SQLITE_ROW ) {
// IMP: V31559_15629
vtab_set_error ( & p - > base , VEC_INTERAL_ERROR " Could not find latest chunk " ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
if ( sqlite3_column_type ( p - > stmtLatestChunk , 0 ) = = SQLITE_NULL ) {
rc = SQLITE_EMPTY ;
goto cleanup ;
}
* chunk_rowid = sqlite3_column_int64 ( p - > stmtLatestChunk , 0 ) ;
rc = sqlite3_step ( p - > stmtLatestChunk ) ;
if ( rc ! = SQLITE_DONE ) {
vtab_set_error ( & p - > base ,
VEC_INTERAL_ERROR
" unknown result code when closing out stmtLatestChunk. "
" Please file an issue: " REPORT_URL ,
p - > schemaName , p - > shadowChunksName ) ;
goto cleanup ;
}
rc = SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
cleanup :
if ( p - > stmtLatestChunk ) {
sqlite3_reset ( p - > stmtLatestChunk ) ;
sqlite3_clear_bindings ( p - > stmtLatestChunk ) ;
}
return rc ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
int vec0_rowids_insert_rowid ( vec0_vtab * p , i64 rowid ) {
int rc = SQLITE_OK ;
int entered = 0 ;
UNUSED_PARAMETER ( entered ) ; // temporary
if ( ! p - > stmtRowidsInsertRowid ) {
const char * zSql =
sqlite3_mprintf ( " INSERT INTO " VEC0_SHADOW_ROWIDS_NAME " (rowid) "
" VALUES (?); " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & p - > stmtRowidsInsertRowid , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , VEC_INTERAL_ERROR
" could not initialize 'insert rowids' statement " ) ;
goto cleanup ;
}
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
# if SQLITE_THREADSAFE
if ( sqlite3_mutex_enter ) {
sqlite3_mutex_enter ( sqlite3_db_mutex ( p - > db ) ) ;
entered = 1 ;
}
# endif
sqlite3_bind_int64 ( p - > stmtRowidsInsertRowid , 1 , rowid ) ;
rc = sqlite3_step ( p - > stmtRowidsInsertRowid ) ;
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_DONE ) {
if ( sqlite3_extended_errcode ( p - > db ) = = SQLITE_CONSTRAINT_PRIMARYKEY ) {
// IMP: V17090_01160
vtab_set_error ( & p - > base , " UNIQUE constraint failed on %s primary key " ,
p - > tableName ) ;
} else {
// IMP: V04679_21517
vtab_set_error ( & p - > base ,
" Error inserting rowid into rowids shadow table: %s " ,
sqlite3_errmsg ( sqlite3_db_handle ( p - > stmtRowidsInsertId ) ) ) ;
}
rc = SQLITE_ERROR ;
goto cleanup ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
rc = SQLITE_OK ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
cleanup :
if ( p - > stmtRowidsInsertRowid ) {
sqlite3_reset ( p - > stmtRowidsInsertRowid ) ;
sqlite3_clear_bindings ( p - > stmtRowidsInsertRowid ) ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# if SQLITE_THREADSAFE
if ( sqlite3_mutex_leave & & entered ) {
sqlite3_mutex_leave ( sqlite3_db_mutex ( p - > db ) ) ;
}
# endif
return rc ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
int vec0_rowids_insert_id ( vec0_vtab * p , sqlite3_value * idValue , i64 * rowid ) {
int rc = SQLITE_OK ;
int entered = 0 ;
UNUSED_PARAMETER ( entered ) ; // temporary
if ( ! p - > stmtRowidsInsertId ) {
const char * zSql =
sqlite3_mprintf ( " INSERT INTO " VEC0_SHADOW_ROWIDS_NAME " (id) "
" VALUES (?); " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto complete ;
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & p - > stmtRowidsInsertId , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , VEC_INTERAL_ERROR
" could not initialize 'insert rowids id' statement " ) ;
goto complete ;
}
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# if SQLITE_THREADSAFE
if ( sqlite3_mutex_enter ) {
sqlite3_mutex_enter ( sqlite3_db_mutex ( p - > db ) ) ;
entered = 1 ;
}
# endif
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( idValue ) {
sqlite3_bind_value ( p - > stmtRowidsInsertId , 1 , idValue ) ;
}
rc = sqlite3_step ( p - > stmtRowidsInsertId ) ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_DONE ) {
if ( sqlite3_extended_errcode ( p - > db ) = = SQLITE_CONSTRAINT_UNIQUE ) {
// IMP: V20497_04568
vtab_set_error ( & p - > base , " UNIQUE constraint failed on %s primary key " ,
p - > tableName ) ;
} else {
// IMP: V24016_08086
// IMP: V15177_32015
vtab_set_error ( & p - > base ,
" Error inserting id into rowids shadow table: %s " ,
sqlite3_errmsg ( sqlite3_db_handle ( p - > stmtRowidsInsertId ) ) ) ;
}
rc = SQLITE_ERROR ;
goto complete ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
* rowid = sqlite3_last_insert_rowid ( p - > db ) ;
rc = SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
complete :
if ( p - > stmtRowidsInsertId ) {
sqlite3_reset ( p - > stmtRowidsInsertId ) ;
sqlite3_clear_bindings ( p - > stmtRowidsInsertId ) ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
# if SQLITE_THREADSAFE
if ( sqlite3_mutex_leave & & entered ) {
sqlite3_mutex_leave ( sqlite3_db_mutex ( p - > db ) ) ;
}
# endif
return rc ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
int vec0_metadata_chunk_size ( vec0_metadata_column_kind kind , int chunk_size ) {
switch ( kind ) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN :
return chunk_size / 8 ;
case VEC0_METADATA_COLUMN_KIND_INTEGER :
return chunk_size * sizeof ( i64 ) ;
case VEC0_METADATA_COLUMN_KIND_FLOAT :
return chunk_size * sizeof ( double ) ;
case VEC0_METADATA_COLUMN_KIND_TEXT :
return chunk_size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ;
}
return 0 ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
int vec0_rowids_update_position ( vec0_vtab * p , i64 rowid , i64 chunk_rowid ,
i64 chunk_offset ) {
int rc = SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( ! p - > stmtRowidsUpdatePosition ) {
const char * zSql = sqlite3_mprintf ( " UPDATE " VEC0_SHADOW_ROWIDS_NAME
" SET chunk_id = ?, chunk_offset = ? "
" WHERE rowid = ? " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & p - > stmtRowidsUpdatePosition , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , VEC_INTERAL_ERROR
" could not initialize 'update rowids position' statement " ) ;
goto cleanup ;
}
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
sqlite3_bind_int64 ( p - > stmtRowidsUpdatePosition , 1 , chunk_rowid ) ;
sqlite3_bind_int64 ( p - > stmtRowidsUpdatePosition , 2 , chunk_offset ) ;
sqlite3_bind_int64 ( p - > stmtRowidsUpdatePosition , 3 , rowid ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_step ( p - > stmtRowidsUpdatePosition ) ;
if ( rc ! = SQLITE_DONE ) {
// IMP: V21925_05995
vtab_set_error ( & p - > base ,
VEC_INTERAL_ERROR
" could not update rowids position for rowid=%lld, "
" chunk_rowid=%lld, chunk_offset=%lld " ,
rowid , chunk_rowid , chunk_offset ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
rc = SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
cleanup :
if ( p - > stmtRowidsUpdatePosition ) {
sqlite3_reset ( p - > stmtRowidsUpdatePosition ) ;
sqlite3_clear_bindings ( p - > stmtRowidsUpdatePosition ) ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
return rc ;
2024-06-25 08:52:48 -07:00
}
2024-10-11 09:09:32 -07:00
/**
2026-03-29 19:44:44 -07:00
* @ brief Adds a new chunk for the vec0 table , and the corresponding vector
* chunks .
2024-10-11 09:09:32 -07:00
*
2026-03-29 19:44:44 -07:00
* Inserts a new row into the _chunks table , with blank data , and uses that new
* rowid to insert new blank rows into _vector_chunksXX tables .
*
* @ param p : vec0 table to add new chunk
* @ param paritionKeyValues : Array of partition key valeus for the new chunk , if available
* @ param chunk_rowid : Output pointer , if not NULL , then will be filled with the
* new chunk rowid .
* @ return int SQLITE_OK on success , error code otherwise .
2024-10-11 09:09:32 -07:00
*/
2026-03-29 19:44:44 -07:00
int vec0_new_chunk ( vec0_vtab * p , sqlite3_value * * partitionKeyValues , i64 * chunk_rowid ) {
int rc ;
char * zSql ;
sqlite3_stmt * stmt ;
i64 rowid ;
2024-06-24 23:26:11 -07:00
2026-03-29 19:44:44 -07:00
// Step 1: Insert a new row in _chunks, capture that new rowid
if ( p - > numPartitionColumns > 0 ) {
sqlite3_str * s = sqlite3_str_new ( NULL ) ;
sqlite3_str_appendf ( s , " INSERT INTO " VEC0_SHADOW_CHUNKS_NAME , p - > schemaName , p - > tableName ) ;
sqlite3_str_appendall ( s , " (size, validity, rowids " ) ;
for ( int i = 0 ; i < p - > numPartitionColumns ; i + + ) {
sqlite3_str_appendf ( s , " , partition%02d " , i ) ;
}
sqlite3_str_appendall ( s , " ) VALUES (?, ?, ? " ) ;
for ( int i = 0 ; i < p - > numPartitionColumns ; i + + ) {
sqlite3_str_appendall ( s , " , ? " ) ;
}
sqlite3_str_appendall ( s , " ) " ) ;
2024-06-25 08:52:48 -07:00
2026-03-29 19:44:44 -07:00
zSql = sqlite3_str_finish ( s ) ;
} else {
zSql = sqlite3_mprintf ( " INSERT INTO " VEC0_SHADOW_CHUNKS_NAME
" (size, validity, rowids) "
" VALUES (?, ?, ?); " ,
p - > schemaName , p - > tableName ) ;
2024-06-24 23:26:11 -07:00
}
2026-03-03 17:35:41 -08:00
2026-03-29 19:44:44 -07:00
if ( ! zSql ) {
return SQLITE_NOMEM ;
2026-03-03 17:35:41 -08:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_finalize ( stmt ) ;
return rc ;
2026-03-03 17:35:41 -08:00
}
2026-03-29 19:44:44 -07:00
# if SQLITE_THREADSAFE
if ( sqlite3_mutex_enter ) {
sqlite3_mutex_enter ( sqlite3_db_mutex ( p - > db ) ) ;
2026-03-03 17:35:41 -08:00
}
2026-03-29 19:44:44 -07:00
# endif
2024-06-24 23:26:11 -07:00
2026-03-29 19:44:44 -07:00
sqlite3_bind_int64 ( stmt , 1 , p - > chunk_size ) ; // size
sqlite3_bind_zeroblob ( stmt , 2 , p - > chunk_size / CHAR_BIT ) ; // validity bitmap
sqlite3_bind_zeroblob ( stmt , 3 , p - > chunk_size * sizeof ( i64 ) ) ; // rowids
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numPartitionColumns ; i + + ) {
sqlite3_bind_value ( stmt , 4 + i , partitionKeyValues [ i ] ) ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_step ( stmt ) ;
int failed = rc ! = SQLITE_DONE ;
rowid = sqlite3_last_insert_rowid ( p - > db ) ;
# if SQLITE_THREADSAFE
if ( sqlite3_mutex_leave ) {
sqlite3_mutex_leave ( sqlite3_db_mutex ( p - > db ) ) ;
}
# endif
sqlite3_finalize ( stmt ) ;
if ( failed ) {
return SQLITE_ERROR ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// Step 2: Create new vector chunks for each vector column, with
// that new chunk_rowid.
//
// SHADOW_TABLE_ROWID_QUIRK: The _vector_chunksNN and _metadatachunksNN
// shadow tables declare "rowid PRIMARY KEY" without the INTEGER type, so
// the user-defined "rowid" column is NOT an alias for the internal SQLite
// rowid (_rowid_). When only appending rows these two happen to stay in
// sync, but after a chunk is deleted (vec0Update_Delete_DeleteChunkIfEmpty)
// and a new one is created, the auto-assigned _rowid_ can diverge from the
// user "rowid" value. Since sqlite3_blob_open() addresses rows by internal
// _rowid_, we must explicitly set BOTH _rowid_ and "rowid" to the same
// value so that later blob operations can find the row.
//
// The correct long-term fix is changing the schema to
// "rowid INTEGER PRIMARY KEY"
// which makes it a true alias, but that would break existing databases.
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR ) {
continue ;
}
int vector_column_idx = p - > user_column_idxs [ i ] ;
2026-03-29 19:45:54 -07:00
2026-03-31 13:51:08 -07:00
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
if ( p - > vector_columns [ vector_column_idx ] . index_type ! = VEC0_INDEX_TYPE_FLAT ) {
2026-03-29 19:45:54 -07:00
continue ;
}
2026-03-29 19:44:44 -07:00
i64 vectorsSize =
p - > chunk_size * vector_column_byte_size ( p - > vector_columns [ vector_column_idx ] ) ;
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
// See SHADOW_TABLE_ROWID_QUIRK above for why _rowid_ and rowid are both set.
zSql = sqlite3_mprintf ( " INSERT INTO " VEC0_SHADOW_VECTOR_N_NAME
" (_rowid_, rowid, vectors) "
" VALUES (?, ?, ?) " ,
p - > schemaName , p - > tableName , vector_column_idx ) ;
if ( ! zSql ) {
return SQLITE_NOMEM ;
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_OK ) {
sqlite3_finalize ( stmt ) ;
return rc ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
sqlite3_bind_int64 ( stmt , 1 , rowid ) ; // _rowid_ (internal SQLite rowid)
sqlite3_bind_int64 ( stmt , 2 , rowid ) ; // rowid (user-defined column)
sqlite3_bind_zeroblob64 ( stmt , 3 , vectorsSize ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_step ( stmt ) ;
sqlite3_finalize ( stmt ) ;
if ( rc ! = SQLITE_DONE ) {
return rc ;
}
}
2024-07-23 08:59:34 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
// Create new rescore chunks for each rescore-enabled vector column
rc = rescore_new_chunk ( p , rowid ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
# endif
2026-03-29 19:44:44 -07:00
// Step 3: Create new metadata chunks for each metadata column
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_METADATA ) {
continue ;
}
int metadata_column_idx = p - > user_column_idxs [ i ] ;
// See SHADOW_TABLE_ROWID_QUIRK above for why _rowid_ and rowid are both set.
zSql = sqlite3_mprintf ( " INSERT INTO " VEC0_SHADOW_METADATA_N_NAME
" (_rowid_, rowid, data) "
" VALUES (?, ?, ?) " ,
p - > schemaName , p - > tableName , metadata_column_idx ) ;
2024-07-23 08:59:34 -07:00
if ( ! zSql ) {
2026-03-29 19:44:44 -07:00
return SQLITE_NOMEM ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
2024-07-23 08:59:34 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
sqlite3_finalize ( stmt ) ;
return rc ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_bind_int64 ( stmt , 1 , rowid ) ; // _rowid_ (internal SQLite rowid)
sqlite3_bind_int64 ( stmt , 2 , rowid ) ; // rowid (user-defined column)
sqlite3_bind_zeroblob64 ( stmt , 3 , vec0_metadata_chunk_size ( p - > metadata_columns [ metadata_column_idx ] . kind , p - > chunk_size ) ) ;
2024-07-23 08:59:34 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_step ( stmt ) ;
sqlite3_finalize ( stmt ) ;
if ( rc ! = SQLITE_DONE ) {
return rc ;
2024-07-23 08:59:34 -07:00
}
}
2026-03-29 19:44:44 -07:00
if ( chunk_rowid ) {
* chunk_rowid = rowid ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
return SQLITE_OK ;
}
2024-07-23 08:59:34 -07:00
2026-03-29 19:44:44 -07:00
struct vec0_query_fullscan_data {
sqlite3_stmt * rowids_stmt ;
i8 done ;
} ;
void vec0_query_fullscan_data_clear (
struct vec0_query_fullscan_data * fullscan_data ) {
if ( ! fullscan_data )
return ;
if ( fullscan_data - > rowids_stmt ) {
sqlite3_finalize ( fullscan_data - > rowids_stmt ) ;
fullscan_data - > rowids_stmt = NULL ;
}
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
struct vec0_query_knn_data {
i64 k ;
i64 k_used ;
// Array of rowids of size k. Must be freed with sqlite3_free().
i64 * rowids ;
// Array of distances of size k. Must be freed with sqlite3_free().
f32 * distances ;
i64 current_idx ;
} ;
void vec0_query_knn_data_clear ( struct vec0_query_knn_data * knn_data ) {
if ( ! knn_data )
return ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( knn_data - > rowids ) {
sqlite3_free ( knn_data - > rowids ) ;
knn_data - > rowids = NULL ;
2024-06-28 15:29:13 -07:00
}
2026-03-29 19:44:44 -07:00
if ( knn_data - > distances ) {
sqlite3_free ( knn_data - > distances ) ;
knn_data - > distances = NULL ;
2024-06-28 15:29:13 -07:00
}
}
2026-03-29 19:44:44 -07:00
struct vec0_query_point_data {
i64 rowid ;
void * vectors [ VEC0_MAX_VECTOR_COLUMNS ] ;
int done ;
} ;
void vec0_query_point_data_clear ( struct vec0_query_point_data * point_data ) {
if ( ! point_data )
return ;
for ( int i = 0 ; i < VEC0_MAX_VECTOR_COLUMNS ; i + + ) {
sqlite3_free ( point_data - > vectors [ i ] ) ;
point_data - > vectors [ i ] = NULL ;
2024-04-20 13:38:58 -07:00
}
}
2026-03-29 19:44:44 -07:00
typedef enum {
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
VEC0_QUERY_PLAN_FULLSCAN = ' 1 ' ,
VEC0_QUERY_PLAN_POINT = ' 2 ' ,
VEC0_QUERY_PLAN_KNN = ' 3 ' ,
} vec0_query_plan ;
2024-06-28 15:29:13 -07:00
2026-03-29 19:44:44 -07:00
typedef struct vec0_cursor vec0_cursor ;
struct vec0_cursor {
sqlite3_vtab_cursor base ;
2024-06-28 15:29:13 -07:00
2026-03-29 19:44:44 -07:00
vec0_query_plan query_plan ;
struct vec0_query_fullscan_data * fullscan_data ;
struct vec0_query_knn_data * knn_data ;
struct vec0_query_point_data * point_data ;
} ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
void vec0_cursor_clear ( vec0_cursor * pCur ) {
if ( pCur - > fullscan_data ) {
vec0_query_fullscan_data_clear ( pCur - > fullscan_data ) ;
sqlite3_free ( pCur - > fullscan_data ) ;
pCur - > fullscan_data = NULL ;
2024-06-28 15:29:13 -07:00
}
2026-03-29 19:44:44 -07:00
if ( pCur - > knn_data ) {
vec0_query_knn_data_clear ( pCur - > knn_data ) ;
sqlite3_free ( pCur - > knn_data ) ;
pCur - > knn_data = NULL ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
if ( pCur - > point_data ) {
vec0_query_point_data_clear ( pCur - > point_data ) ;
sqlite3_free ( pCur - > point_data ) ;
pCur - > point_data = NULL ;
2024-07-23 23:57:42 -07:00
}
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:46:23 -07:00
// IVF index implementation — #include'd here after all struct/helper definitions
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
# include "sqlite-vec-ivf-kmeans.c"
# include "sqlite-vec-ivf.c"
# endif
2026-03-29 19:44:44 -07:00
# define VEC_CONSTRUCTOR_ERROR "vec0 constructor error: "
static int vec0_init ( sqlite3 * db , void * pAux , int argc , const char * const * argv ,
sqlite3_vtab * * ppVtab , char * * pzErr , bool isCreate ) {
UNUSED_PARAMETER ( pAux ) ;
vec0_vtab * pNew ;
2024-11-20 00:02:04 -08:00
int rc ;
2026-03-29 19:44:44 -07:00
const char * zSql ;
pNew = sqlite3_malloc ( sizeof ( * pNew ) ) ;
if ( pNew = = 0 )
2024-11-20 00:02:04 -08:00
return SQLITE_NOMEM ;
2026-03-29 19:44:44 -07:00
memset ( pNew , 0 , sizeof ( * pNew ) ) ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// Declared chunk_size=N for entire table.
// -1 to use the defualt, otherwise will get re-assigned on `chunk_size=N`
// option
int chunk_size = - 1 ;
int numVectorColumns = 0 ;
int numPartitionColumns = 0 ;
int numAuxiliaryColumns = 0 ;
int numMetadataColumns = 0 ;
int user_column_idx = 0 ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// track if a "primary key" column is defined
char * pkColumnName = NULL ;
int pkColumnNameLength ;
int pkColumnType = SQLITE_INTEGER ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
for ( int i = 3 ; i < argc ; i + + ) {
struct VectorColumnDefinition vecColumn ;
struct Vec0PartitionColumnDefinition partitionColumn ;
struct Vec0AuxiliaryColumnDefinition auxColumn ;
struct Vec0MetadataColumnDefinition metadataColumn ;
char * cName = NULL ;
int cNameLength ;
int cType ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
// Scenario #1: Constructor argument is a vector column definition, ie `foo float[1024]`
rc = vec0_parse_vector_column ( argv [ i ] , strlen ( argv [ i ] ) , & vecColumn ) ;
if ( rc = = SQLITE_ERROR ) {
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR " could not parse vector column '%s' " , argv [ i ] ) ;
goto error ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
if ( rc = = SQLITE_OK ) {
if ( numVectorColumns > = VEC0_MAX_VECTOR_COLUMNS ) {
sqlite3_free ( vecColumn . name ) ;
* pzErr = sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" Too many provided vector columns, maximum %d " ,
VEC0_MAX_VECTOR_COLUMNS ) ;
goto error ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
if ( vecColumn . dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS ) {
sqlite3_free ( vecColumn . name ) ;
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" Dimension on vector column too large, provided %lld, maximum %lld " ,
( i64 ) vecColumn . dimensions , SQLITE_VEC_VEC0_MAX_DIMENSIONS ) ;
goto error ;
2024-11-20 00:59:34 -08:00
}
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// DiskANN validation
if ( vecColumn . index_type = = VEC0_INDEX_TYPE_DISKANN ) {
if ( vecColumn . element_type = = SQLITE_VEC_ELEMENT_TYPE_BIT ) {
sqlite3_free ( vecColumn . name ) ;
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" DiskANN index is not supported on bit vector columns " ) ;
goto error ;
}
if ( vecColumn . diskann . quantizer_type = = VEC0_DISKANN_QUANTIZER_BINARY & &
( vecColumn . dimensions % CHAR_BIT ) ! = 0 ) {
sqlite3_free ( vecColumn . name ) ;
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" DiskANN with binary quantizer requires dimensions divisible by 8 " ) ;
goto error ;
}
}
2026-03-29 19:44:44 -07:00
pNew - > user_column_kinds [ user_column_idx ] = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR ;
pNew - > user_column_idxs [ user_column_idx ] = numVectorColumns ;
memcpy ( & pNew - > vector_columns [ numVectorColumns ] , & vecColumn , sizeof ( vecColumn ) ) ;
numVectorColumns + + ;
pNew - > numVectorColumns = numVectorColumns ;
user_column_idx + + ;
continue ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
// Scenario #2: Constructor argument is a partition key column definition, ie `user_id text partition key`
rc = vec0_parse_partition_key_definition ( argv [ i ] , strlen ( argv [ i ] ) , & cName ,
& cNameLength , & cType ) ;
if ( rc = = SQLITE_OK ) {
if ( numPartitionColumns > = VEC0_MAX_PARTITION_COLUMNS ) {
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" More than %d partition key columns were provided " ,
VEC0_MAX_PARTITION_COLUMNS ) ;
goto error ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
partitionColumn . type = cType ;
partitionColumn . name_length = cNameLength ;
partitionColumn . name = sqlite3_mprintf ( " %.*s " , cNameLength , cName ) ;
if ( ! partitionColumn . name ) {
rc = SQLITE_NOMEM ;
goto error ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
pNew - > user_column_kinds [ user_column_idx ] = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION ;
pNew - > user_column_idxs [ user_column_idx ] = numPartitionColumns ;
memcpy ( & pNew - > paritition_columns [ numPartitionColumns ] , & partitionColumn , sizeof ( partitionColumn ) ) ;
numPartitionColumns + + ;
pNew - > numPartitionColumns = numPartitionColumns ;
user_column_idx + + ;
continue ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// Scenario #3: Constructor argument is a primary key column definition, ie `article_id text primary key`
rc = vec0_parse_primary_key_definition ( argv [ i ] , strlen ( argv [ i ] ) , & cName ,
& cNameLength , & cType ) ;
if ( rc = = SQLITE_OK ) {
if ( pkColumnName ) {
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" More than one primary key definition was provided, vec0 only "
" suports a single primary key column " ,
argv [ i ] ) ;
goto error ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
pkColumnName = cName ;
pkColumnNameLength = cNameLength ;
pkColumnType = cType ;
continue ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
// Scenario #4: Constructor argument is a auxiliary column definition, ie `+contents text`
rc = vec0_parse_auxiliary_column_definition ( argv [ i ] , strlen ( argv [ i ] ) , & cName ,
& cNameLength , & cType ) ;
if ( rc = = SQLITE_OK ) {
if ( numAuxiliaryColumns > = VEC0_MAX_AUXILIARY_COLUMNS ) {
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" More than %d auxiliary columns were provided " ,
VEC0_MAX_AUXILIARY_COLUMNS ) ;
goto error ;
}
auxColumn . type = cType ;
auxColumn . name_length = cNameLength ;
auxColumn . name = sqlite3_mprintf ( " %.*s " , cNameLength , cName ) ;
if ( ! auxColumn . name ) {
rc = SQLITE_NOMEM ;
goto error ;
}
pNew - > user_column_kinds [ user_column_idx ] = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY ;
pNew - > user_column_idxs [ user_column_idx ] = numAuxiliaryColumns ;
memcpy ( & pNew - > auxiliary_columns [ numAuxiliaryColumns ] , & auxColumn , sizeof ( auxColumn ) ) ;
numAuxiliaryColumns + + ;
pNew - > numAuxiliaryColumns = numAuxiliaryColumns ;
user_column_idx + + ;
continue ;
2024-07-23 08:59:34 -07:00
}
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
vec0_metadata_column_kind kind ;
rc = vec0_parse_metadata_column_definition ( argv [ i ] , strlen ( argv [ i ] ) , & cName ,
& cNameLength , & kind ) ;
if ( rc = = SQLITE_OK ) {
if ( numMetadataColumns > = VEC0_MAX_METADATA_COLUMNS ) {
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" More than %d metadata columns were provided " ,
VEC0_MAX_METADATA_COLUMNS ) ;
goto error ;
}
metadataColumn . kind = kind ;
metadataColumn . name_length = cNameLength ;
metadataColumn . name = sqlite3_mprintf ( " %.*s " , cNameLength , cName ) ;
if ( ! metadataColumn . name ) {
rc = SQLITE_NOMEM ;
goto error ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
pNew - > user_column_kinds [ user_column_idx ] = SQLITE_VEC0_USER_COLUMN_KIND_METADATA ;
pNew - > user_column_idxs [ user_column_idx ] = numMetadataColumns ;
memcpy ( & pNew - > metadata_columns [ numMetadataColumns ] , & metadataColumn , sizeof ( metadataColumn ) ) ;
numMetadataColumns + + ;
pNew - > numMetadataColumns = numMetadataColumns ;
user_column_idx + + ;
continue ;
}
2024-07-23 08:59:34 -07:00
2026-03-29 19:44:44 -07:00
// Scenario #4: Constructor argument is a table-level option, ie `chunk_size`
2024-07-23 08:59:34 -07:00
2026-03-29 19:44:44 -07:00
char * key ;
char * value ;
int keyLength , valueLength ;
rc = vec0_parse_table_option ( argv [ i ] , strlen ( argv [ i ] ) , & key , & keyLength ,
& value , & valueLength ) ;
if ( rc = = SQLITE_ERROR ) {
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR " could not parse table option '%s' " , argv [ i ] ) ;
goto error ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
if ( rc = = SQLITE_OK ) {
if ( sqlite3_strnicmp ( key , " chunk_size " , keyLength ) = = 0 ) {
chunk_size = atoi ( value ) ;
if ( chunk_size < = 0 ) {
// IMP: V01931_18769
* pzErr =
sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" chunk_size must be a non-zero positive integer " ) ;
goto error ;
}
if ( ( chunk_size % 8 ) ! = 0 ) {
// IMP: V14110_30948
* pzErr = sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" chunk_size must be divisible by 8 " ) ;
goto error ;
}
# define SQLITE_VEC_CHUNK_SIZE_MAX 4096
if ( chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX ) {
* pzErr =
sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR " chunk_size too large " ) ;
goto error ;
}
} else {
// IMP: V27642_11712
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR " Unknown table option: %.*s " , keyLength , key ) ;
goto error ;
}
continue ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
// Scenario #5: Unknown constructor argument
* pzErr =
sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR " Could not parse '%s' " , argv [ i ] ) ;
goto error ;
2024-07-23 23:57:42 -07:00
}
2024-07-23 08:59:34 -07:00
2026-03-29 19:44:44 -07:00
if ( chunk_size < 0 ) {
chunk_size = 1024 ;
2024-07-23 23:57:42 -07:00
}
2024-07-23 08:59:34 -07:00
2026-03-29 19:44:44 -07:00
if ( numVectorColumns < = 0 ) {
* pzErr = sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" At least one vector column is required " ) ;
goto error ;
2024-07-23 23:57:42 -07:00
}
2024-07-23 08:59:34 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
{
int hasRescore = 0 ;
for ( int i = 0 ; i < numVectorColumns ; i + + ) {
if ( pNew - > vector_columns [ i ] . index_type = = VEC0_INDEX_TYPE_RESCORE ) {
hasRescore = 1 ;
break ;
}
}
if ( hasRescore ) {
if ( numMetadataColumns > 0 ) {
* pzErr = sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" Metadata columns are not supported with rescore indexes " ) ;
goto error ;
}
if ( numPartitionColumns > 0 ) {
* pzErr = sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" Partition key columns are not supported with rescore indexes " ) ;
goto error ;
}
}
}
# endif
2026-03-29 19:46:23 -07:00
// IVF indexes do not support auxiliary, metadata, or partition key columns.
{
int has_ivf = 0 ;
for ( int i = 0 ; i < numVectorColumns ; i + + ) {
if ( pNew - > vector_columns [ i ] . index_type = = VEC0_INDEX_TYPE_IVF ) {
has_ivf = 1 ;
break ;
}
}
if ( has_ivf ) {
if ( numPartitionColumns > 0 ) {
* pzErr = sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" partition key columns are not supported with IVF indexes " ) ;
goto error ;
}
if ( numMetadataColumns > 0 ) {
* pzErr = sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" metadata columns are not supported with IVF indexes " ) ;
goto error ;
}
}
}
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// DiskANN columns cannot coexist with aux/metadata/partition columns
for ( int i = 0 ; i < numVectorColumns ; i + + ) {
if ( pNew - > vector_columns [ i ] . index_type = = VEC0_INDEX_TYPE_DISKANN ) {
if ( numMetadataColumns > 0 ) {
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" Metadata columns are not supported with DiskANN-indexed vector columns " ) ;
goto error ;
}
if ( numPartitionColumns > 0 ) {
* pzErr = sqlite3_mprintf (
VEC_CONSTRUCTOR_ERROR
" Partition key columns are not supported with DiskANN-indexed vector columns " ) ;
goto error ;
}
break ;
}
}
2026-03-29 19:44:44 -07:00
sqlite3_str * createStr = sqlite3_str_new ( NULL ) ;
sqlite3_str_appendall ( createStr , " CREATE TABLE x( " ) ;
if ( pkColumnName ) {
sqlite3_str_appendf ( createStr , " \" %.*w \" primary key, " , pkColumnNameLength ,
pkColumnName ) ;
} else {
sqlite3_str_appendall ( createStr , " rowid, " ) ;
2024-07-23 23:57:42 -07:00
}
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns + numMetadataColumns ; i + + ) {
switch ( pNew - > user_column_kinds [ i ] ) {
case SQLITE_VEC0_USER_COLUMN_KIND_VECTOR : {
int vector_idx = pNew - > user_column_idxs [ i ] ;
sqlite3_str_appendf ( createStr , " \" %.*w \" , " ,
pNew - > vector_columns [ vector_idx ] . name_length ,
pNew - > vector_columns [ vector_idx ] . name ) ;
break ;
}
case SQLITE_VEC0_USER_COLUMN_KIND_PARTITION : {
int partition_idx = pNew - > user_column_idxs [ i ] ;
sqlite3_str_appendf ( createStr , " \" %.*w \" , " ,
pNew - > paritition_columns [ partition_idx ] . name_length ,
pNew - > paritition_columns [ partition_idx ] . name ) ;
break ;
}
case SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY : {
int auxiliary_idx = pNew - > user_column_idxs [ i ] ;
sqlite3_str_appendf ( createStr , " \" %.*w \" , " ,
pNew - > auxiliary_columns [ auxiliary_idx ] . name_length ,
pNew - > auxiliary_columns [ auxiliary_idx ] . name ) ;
break ;
}
case SQLITE_VEC0_USER_COLUMN_KIND_METADATA : {
int metadata_idx = pNew - > user_column_idxs [ i ] ;
sqlite3_str_appendf ( createStr , " \" %.*w \" , " ,
pNew - > metadata_columns [ metadata_idx ] . name_length ,
pNew - > metadata_columns [ metadata_idx ] . name ) ;
break ;
}
2024-07-23 08:59:34 -07:00
}
}
2026-03-29 19:44:44 -07:00
sqlite3_str_appendall ( createStr , " distance hidden, k hidden) " ) ;
if ( pkColumnName ) {
sqlite3_str_appendall ( createStr , " without rowid " ) ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
zSql = sqlite3_str_finish ( createStr ) ;
if ( ! zSql ) {
goto error ;
}
rc = sqlite3_declare_vtab ( db , zSql ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( rc ! = SQLITE_OK ) {
* pzErr = sqlite3_mprintf ( VEC_CONSTRUCTOR_ERROR
" could not declare virtual table, '%s' " ,
sqlite3_errmsg ( db ) ) ;
goto error ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
const char * schemaName = argv [ 1 ] ;
const char * tableName = argv [ 2 ] ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
pNew - > db = db ;
pNew - > pkIsText = pkColumnType = = SQLITE_TEXT ;
pNew - > schemaName = sqlite3_mprintf ( " %s " , schemaName ) ;
if ( ! pNew - > schemaName ) {
goto error ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
pNew - > tableName = sqlite3_mprintf ( " %s " , tableName ) ;
if ( ! pNew - > tableName ) {
goto error ;
2024-07-23 23:57:42 -07:00
}
2026-03-29 19:44:44 -07:00
pNew - > shadowRowidsName = sqlite3_mprintf ( " %s_rowids " , tableName ) ;
if ( ! pNew - > shadowRowidsName ) {
goto error ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
pNew - > shadowChunksName = sqlite3_mprintf ( " %s_chunks " , tableName ) ;
if ( ! pNew - > shadowChunksName ) {
goto error ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
pNew - > numVectorColumns = numVectorColumns ;
pNew - > numPartitionColumns = numPartitionColumns ;
pNew - > numAuxiliaryColumns = numAuxiliaryColumns ;
pNew - > numMetadataColumns = numMetadataColumns ;
2024-07-23 08:59:34 -07:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < pNew - > numVectorColumns ; i + + ) {
pNew - > shadowVectorChunksNames [ i ] =
sqlite3_mprintf ( " %s_vector_chunks%02d " , tableName , i ) ;
if ( ! pNew - > shadowVectorChunksNames [ i ] ) {
goto error ;
}
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
if ( pNew - > vector_columns [ i ] . index_type = = VEC0_INDEX_TYPE_RESCORE ) {
pNew - > shadowRescoreChunksNames [ i ] =
sqlite3_mprintf ( " %s_rescore_chunks%02d " , tableName , i ) ;
if ( ! pNew - > shadowRescoreChunksNames [ i ] ) {
goto error ;
}
pNew - > shadowRescoreVectorsNames [ i ] =
sqlite3_mprintf ( " %s_rescore_vectors%02d " , tableName , i ) ;
if ( ! pNew - > shadowRescoreVectorsNames [ i ] ) {
goto error ;
}
}
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# endif
# if SQLITE_VEC_ENABLE_DISKANN
if ( pNew - > vector_columns [ i ] . index_type = = VEC0_INDEX_TYPE_DISKANN ) {
pNew - > shadowVectorsNames [ i ] =
sqlite3_mprintf ( " %s_vectors%02d " , tableName , i ) ;
if ( ! pNew - > shadowVectorsNames [ i ] ) {
goto error ;
}
pNew - > shadowDiskannNodesNames [ i ] =
sqlite3_mprintf ( " %s_diskann_nodes%02d " , tableName , i ) ;
if ( ! pNew - > shadowDiskannNodesNames [ i ] ) {
goto error ;
}
}
2026-03-29 19:45:54 -07:00
# endif
2024-07-23 08:59:34 -07:00
}
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
for ( int i = 0 ; i < pNew - > numVectorColumns ; i + + ) {
if ( pNew - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_IVF ) continue ;
pNew - > shadowIvfCellsNames [ i ] =
sqlite3_mprintf ( " %s_ivf_cells%02d " , tableName , i ) ;
if ( ! pNew - > shadowIvfCellsNames [ i ] ) goto error ;
pNew - > ivfTrainedCache [ i ] = - 1 ; // unknown
}
# endif
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < pNew - > numMetadataColumns ; i + + ) {
pNew - > shadowMetadataChunksNames [ i ] =
sqlite3_mprintf ( " %s_metadatachunks%02d " , tableName , i ) ;
if ( ! pNew - > shadowMetadataChunksNames [ i ] ) {
goto error ;
}
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
pNew - > chunk_size = chunk_size ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
// if xCreate, then create the necessary shadow tables
if ( isCreate ) {
sqlite3_stmt * stmt ;
int rc ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
char * zCreateInfo = sqlite3_mprintf ( " CREATE TABLE " VEC0_SHADOW_INFO_NAME " (key text primary key, value any) " , pNew - > schemaName , pNew - > tableName ) ;
if ( ! zCreateInfo ) {
goto error ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( db , zCreateInfo , - 1 , & stmt , NULL ) ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
sqlite3_free ( ( void * ) zCreateInfo ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
// TODO(IMP)
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf ( " Could not create '_info' shadow table: %s " ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
2024-06-24 23:26:11 -07:00
sqlite3_finalize ( stmt ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
char * zSeedInfo = sqlite3_mprintf (
" INSERT INTO " VEC0_SHADOW_INFO_NAME " (key, value) VALUES "
" (?1, ?2), (?3, ?4), (?5, ?6), (?7, ?8) " ,
pNew - > schemaName , pNew - > tableName
) ;
if ( ! zSeedInfo ) {
goto error ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( db , zSeedInfo , - 1 , & stmt , NULL ) ;
sqlite3_free ( ( void * ) zSeedInfo ) ;
if ( rc ! = SQLITE_OK ) {
// TODO(IMP)
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf ( " Could not seed '_info' shadow table: %s " ,
sqlite3_errmsg ( db ) ) ;
goto error ;
2024-06-24 23:26:11 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_bind_text ( stmt , 1 , " CREATE_VERSION " , - 1 , SQLITE_STATIC ) ;
sqlite3_bind_text ( stmt , 2 , SQLITE_VEC_VERSION , - 1 , SQLITE_STATIC ) ;
sqlite3_bind_text ( stmt , 3 , " CREATE_VERSION_MAJOR " , - 1 , SQLITE_STATIC ) ;
sqlite3_bind_int ( stmt , 4 , SQLITE_VEC_VERSION_MAJOR ) ;
sqlite3_bind_text ( stmt , 5 , " CREATE_VERSION_MINOR " , - 1 , SQLITE_STATIC ) ;
sqlite3_bind_int ( stmt , 6 , SQLITE_VEC_VERSION_MINOR ) ;
sqlite3_bind_text ( stmt , 7 , " CREATE_VERSION_PATCH " , - 1 , SQLITE_STATIC ) ;
sqlite3_bind_int ( stmt , 8 , SQLITE_VEC_VERSION_PATCH ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) {
// TODO(IMP)
2024-06-24 23:26:11 -07:00
sqlite3_finalize ( stmt ) ;
2026-03-29 19:44:44 -07:00
* pzErr = sqlite3_mprintf ( " Could not seed '_info' shadow table: %s " ,
sqlite3_errmsg ( db ) ) ;
goto error ;
2024-06-24 23:26:11 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_finalize ( stmt ) ;
2024-06-24 23:26:11 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
// Seed medoid entries for DiskANN-indexed columns
for ( int i = 0 ; i < pNew - > numVectorColumns ; i + + ) {
if ( pNew - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_DISKANN ) {
continue ;
}
char * key = sqlite3_mprintf ( " diskann_medoid_%02d " , i ) ;
char * zInsert = sqlite3_mprintf (
" INSERT INTO " VEC0_SHADOW_INFO_NAME " (key, value) VALUES (?1, ?2) " ,
pNew - > schemaName , pNew - > tableName ) ;
rc = sqlite3_prepare_v2 ( db , zInsert , - 1 , & stmt , NULL ) ;
sqlite3_free ( zInsert ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_free ( key ) ;
sqlite3_finalize ( stmt ) ;
goto error ;
}
sqlite3_bind_text ( stmt , 1 , key , - 1 , sqlite3_free ) ;
sqlite3_bind_null ( stmt , 2 ) ; // NULL means empty graph
if ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) {
sqlite3_finalize ( stmt ) ;
goto error ;
}
sqlite3_finalize ( stmt ) ;
}
# endif
2026-03-29 19:44:44 -07:00
// create the _chunks shadow table
char * zCreateShadowChunks = NULL ;
if ( pNew - > numPartitionColumns ) {
sqlite3_str * s = sqlite3_str_new ( NULL ) ;
sqlite3_str_appendf ( s , " CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME " ( " , pNew - > schemaName , pNew - > tableName ) ;
sqlite3_str_appendall ( s , " chunk_id INTEGER PRIMARY KEY AUTOINCREMENT, " " size INTEGER NOT NULL, " ) ;
sqlite3_str_appendall ( s , " sequence_id integer, " ) ;
for ( int i = 0 ; i < pNew - > numPartitionColumns ; i + + ) {
sqlite3_str_appendf ( s , " partition%02d, " , i ) ;
}
sqlite3_str_appendall ( s , " validity BLOB NOT NULL, rowids BLOB NOT NULL); " ) ;
zCreateShadowChunks = sqlite3_str_finish ( s ) ;
} else {
zCreateShadowChunks = sqlite3_mprintf ( VEC0_SHADOW_CHUNKS_CREATE ,
pNew - > schemaName , pNew - > tableName ) ;
2024-06-24 23:26:11 -07:00
}
2026-03-29 19:44:44 -07:00
if ( ! zCreateShadowChunks ) {
goto error ;
}
rc = sqlite3_prepare_v2 ( db , zCreateShadowChunks , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zCreateShadowChunks ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
// IMP: V17740_01811
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf ( " Could not create '_chunks' shadow table: %s " ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
sqlite3_finalize ( stmt ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// create the _rowids shadow table
char * zCreateShadowRowids ;
if ( pNew - > pkIsText ) {
// adds a "text unique not null" constraint to the id column
zCreateShadowRowids = sqlite3_mprintf ( VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT ,
pNew - > schemaName , pNew - > tableName ) ;
} else {
zCreateShadowRowids = sqlite3_mprintf ( VEC0_SHADOW_ROWIDS_CREATE_BASIC ,
pNew - > schemaName , pNew - > tableName ) ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
if ( ! zCreateShadowRowids ) {
goto error ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( db , zCreateShadowRowids , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zCreateShadowRowids ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
// IMP: V11631_28470
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf ( " Could not create '_rowids' shadow table: %s " ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
sqlite3_finalize ( stmt ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < pNew - > numVectorColumns ; i + + ) {
2026-03-31 13:51:08 -07:00
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
2026-03-29 19:46:23 -07:00
if ( pNew - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_FLAT )
2026-03-29 19:45:54 -07:00
continue ;
2026-03-29 19:44:44 -07:00
char * zSql = sqlite3_mprintf ( VEC0_SHADOW_VECTOR_N_CREATE ,
pNew - > schemaName , pNew - > tableName , i ) ;
if ( ! zSql ) {
goto error ;
}
rc = sqlite3_prepare_v2 ( db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
// IMP: V25919_09989
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf (
" Could not create '_vector_chunks%02d' shadow table: %s " , i ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
2024-11-20 00:59:34 -08:00
sqlite3_finalize ( stmt ) ;
}
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
rc = rescore_create_tables ( pNew , db , pzErr ) ;
if ( rc ! = SQLITE_OK ) {
goto error ;
}
# endif
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// Create IVF shadow tables for IVF-indexed vector columns
for ( int i = 0 ; i < pNew - > numVectorColumns ; i + + ) {
if ( pNew - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_IVF ) continue ;
rc = ivf_create_shadow_tables ( pNew , i ) ;
if ( rc ! = SQLITE_OK ) {
* pzErr = sqlite3_mprintf ( " Could not create IVF shadow tables for column %d " , i ) ;
goto error ;
}
}
# endif
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
// Create DiskANN shadow tables for indexed vector columns
for ( int i = 0 ; i < pNew - > numVectorColumns ; i + + ) {
if ( pNew - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_DISKANN ) {
continue ;
}
// Create _vectors{NN} table
{
char * zSql = sqlite3_mprintf (
" CREATE TABLE " VEC0_SHADOW_VECTORS_N_NAME
" (rowid INTEGER PRIMARY KEY, vector BLOB NOT NULL); " ,
pNew - > schemaName , pNew - > tableName , i ) ;
if ( ! zSql ) {
goto error ;
}
rc = sqlite3_prepare_v2 ( db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf (
" Could not create '_vectors%02d' shadow table: %s " , i ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
sqlite3_finalize ( stmt ) ;
}
// Create _diskann_nodes{NN} table
{
char * zSql = sqlite3_mprintf (
" CREATE TABLE " VEC0_SHADOW_DISKANN_NODES_N_NAME " ( "
" rowid INTEGER PRIMARY KEY, "
" neighbors_validity BLOB NOT NULL, "
" neighbor_ids BLOB NOT NULL, "
" neighbor_quantized_vectors BLOB NOT NULL "
" ); " ,
pNew - > schemaName , pNew - > tableName , i ) ;
if ( ! zSql ) {
goto error ;
}
rc = sqlite3_prepare_v2 ( db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf (
" Could not create '_diskann_nodes%02d' shadow table: %s " , i ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
sqlite3_finalize ( stmt ) ;
}
// Create _diskann_buffer{NN} table (for batched inserts)
{
char * zSql = sqlite3_mprintf (
" CREATE TABLE " VEC0_SHADOW_DISKANN_BUFFER_N_NAME " ( "
" rowid INTEGER PRIMARY KEY, "
" vector BLOB NOT NULL "
" ); " ,
pNew - > schemaName , pNew - > tableName , i ) ;
if ( ! zSql ) {
goto error ;
}
rc = sqlite3_prepare_v2 ( db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf (
" Could not create '_diskann_buffer%02d' shadow table: %s " , i ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
sqlite3_finalize ( stmt ) ;
}
}
# endif
2026-03-29 19:44:44 -07:00
// See SHADOW_TABLE_ROWID_QUIRK in vec0_new_chunk() — same "rowid PRIMARY KEY"
// without INTEGER type issue applies here.
for ( int i = 0 ; i < pNew - > numMetadataColumns ; i + + ) {
char * zSql = sqlite3_mprintf ( " CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME " (rowid PRIMARY KEY, data BLOB NOT NULL); " ,
pNew - > schemaName , pNew - > tableName , i ) ;
if ( ! zSql ) {
goto error ;
}
rc = sqlite3_prepare_v2 ( db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf (
" Could not create '_metata_chunks%02d' shadow table: %s " , i ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
sqlite3_finalize ( stmt ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
if ( pNew - > metadata_columns [ i ] . kind = = VEC0_METADATA_COLUMN_KIND_TEXT ) {
char * zSql = sqlite3_mprintf ( " CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid PRIMARY KEY, data TEXT); " ,
pNew - > schemaName , pNew - > tableName , i ) ;
if ( ! zSql ) {
goto error ;
}
rc = sqlite3_prepare_v2 ( db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf (
" Could not create '_metadatatext%02d' shadow table: %s " , i ,
sqlite3_errmsg ( db ) ) ;
goto error ;
}
sqlite3_finalize ( stmt ) ;
}
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
if ( pNew - > numAuxiliaryColumns > 0 ) {
sqlite3_stmt * stmt ;
sqlite3_str * s = sqlite3_str_new ( NULL ) ;
sqlite3_str_appendf ( s , " CREATE TABLE " VEC0_SHADOW_AUXILIARY_NAME " ( rowid integer PRIMARY KEY " , pNew - > schemaName , pNew - > tableName ) ;
for ( int i = 0 ; i < pNew - > numAuxiliaryColumns ; i + + ) {
sqlite3_str_appendf ( s , " , value%02d " , i ) ;
}
sqlite3_str_appendall ( s , " ) " ) ;
char * zSql = sqlite3_str_finish ( s ) ;
if ( ! zSql ) {
goto error ;
}
rc = sqlite3_prepare_v2 ( db , zSql , - 1 , & stmt , NULL ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
sqlite3_finalize ( stmt ) ;
* pzErr = sqlite3_mprintf (
" Could not create auxiliary shadow table: %s " ,
sqlite3_errmsg ( db ) ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
goto error ;
}
sqlite3_finalize ( stmt ) ;
}
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
* ppVtab = ( sqlite3_vtab * ) pNew ;
2024-04-20 13:38:58 -07:00
return SQLITE_OK ;
2026-03-29 19:44:44 -07:00
error :
vec0_free ( pNew ) ;
sqlite3_free ( pNew ) ;
return SQLITE_ERROR ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
static int vec0Create ( sqlite3 * db , void * pAux , int argc ,
const char * const * argv , sqlite3_vtab * * ppVtab ,
char * * pzErr ) {
return vec0_init ( db , pAux , argc , argv , ppVtab , pzErr , true ) ;
}
static int vec0Connect ( sqlite3 * db , void * pAux , int argc ,
const char * const * argv , sqlite3_vtab * * ppVtab ,
char * * pzErr ) {
return vec0_init ( db , pAux , argc , argv , ppVtab , pzErr , false ) ;
}
2024-06-28 15:29:13 -07:00
2026-03-29 19:44:44 -07:00
static int vec0Disconnect ( sqlite3_vtab * pVtab ) {
vec0_vtab * p = ( vec0_vtab * ) pVtab ;
vec0_free ( p ) ;
sqlite3_free ( p ) ;
return SQLITE_OK ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
static int vec0Destroy ( sqlite3_vtab * pVtab ) {
vec0_vtab * p = ( vec0_vtab * ) pVtab ;
sqlite3_stmt * stmt ;
int rc ;
const char * zSql ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// Free up any sqlite3_stmt, otherwise DROPs on those tables will fail
vec0_free_resources ( p ) ;
2024-06-28 15:29:13 -07:00
2026-03-29 19:44:44 -07:00
// TODO(test) later: can't evidence-of here, bc always gives "SQL logic error" instead of
// provided error
zSql = sqlite3_mprintf ( " DROP TABLE " VEC0_SHADOW_CHUNKS_NAME , p - > schemaName ,
p - > tableName ) ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
vtab_set_error ( pVtab , " could not drop chunks shadow table " ) ;
goto done ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_finalize ( stmt ) ;
zSql = sqlite3_mprintf ( " DROP TABLE " VEC0_SHADOW_INFO_NAME , p - > schemaName ,
p - > tableName ) ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
vtab_set_error ( pVtab , " could not drop info shadow table " ) ;
goto done ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_finalize ( stmt ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
zSql = sqlite3_mprintf ( " DROP TABLE " VEC0_SHADOW_ROWIDS_NAME , p - > schemaName ,
p - > tableName ) ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
goto done ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_finalize ( stmt ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
if ( p - > vector_columns [ i ] . index_type = = VEC0_INDEX_TYPE_DISKANN ) {
// Drop DiskANN shadow tables
zSql = sqlite3_mprintf ( " DROP TABLE IF EXISTS " VEC0_SHADOW_VECTORS_N_NAME ,
p - > schemaName , p - > tableName , i ) ;
if ( zSql ) {
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
goto done ;
}
sqlite3_finalize ( stmt ) ;
}
zSql = sqlite3_mprintf ( " DROP TABLE IF EXISTS " VEC0_SHADOW_DISKANN_NODES_N_NAME ,
p - > schemaName , p - > tableName , i ) ;
if ( zSql ) {
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
goto done ;
}
sqlite3_finalize ( stmt ) ;
}
zSql = sqlite3_mprintf ( " DROP TABLE IF EXISTS " VEC0_SHADOW_DISKANN_BUFFER_N_NAME ,
p - > schemaName , p - > tableName , i ) ;
if ( zSql ) {
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
goto done ;
}
sqlite3_finalize ( stmt ) ;
}
continue ;
}
# endif
2026-03-31 13:51:08 -07:00
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
2026-03-29 19:46:23 -07:00
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_FLAT )
2026-03-29 19:45:54 -07:00
continue ;
2026-03-29 19:44:44 -07:00
zSql = sqlite3_mprintf ( " DROP TABLE \" %w \" . \" %w \" " , p - > schemaName ,
p - > shadowVectorChunksNames [ i ] ) ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
goto done ;
}
sqlite3_finalize ( stmt ) ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
rc = rescore_drop_tables ( p ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
# endif
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// Drop IVF shadow tables
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_IVF ) continue ;
ivf_drop_shadow_tables ( p , i ) ;
}
# endif
2026-03-29 19:44:44 -07:00
if ( p - > numAuxiliaryColumns > 0 ) {
zSql = sqlite3_mprintf ( " DROP TABLE " VEC0_SHADOW_AUXILIARY_NAME , p - > schemaName , p - > tableName ) ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
goto done ;
}
sqlite3_finalize ( stmt ) ;
}
2024-11-20 00:02:04 -08:00
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < p - > numMetadataColumns ; i + + ) {
zSql = sqlite3_mprintf ( " DROP TABLE " VEC0_SHADOW_METADATA_N_NAME , p - > schemaName , p - > tableName , i ) ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
goto done ;
}
sqlite3_finalize ( stmt ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( p - > metadata_columns [ i ] . kind = = VEC0_METADATA_COLUMN_KIND_TEXT ) {
zSql = sqlite3_mprintf ( " DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME , p - > schemaName , p - > tableName , i ) ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , 0 ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( ( rc ! = SQLITE_OK ) | | ( sqlite3_step ( stmt ) ! = SQLITE_DONE ) ) {
rc = SQLITE_ERROR ;
goto done ;
}
sqlite3_finalize ( stmt ) ;
}
2024-10-11 09:09:32 -07:00
}
2026-03-29 19:44:44 -07:00
stmt = NULL ;
rc = SQLITE_OK ;
done :
sqlite3_finalize ( stmt ) ;
vec0_free ( p ) ;
// If there was an error
if ( rc = = SQLITE_OK ) {
sqlite3_free ( p ) ;
2024-10-11 09:09:32 -07:00
}
2026-03-29 19:44:44 -07:00
return rc ;
2024-10-11 09:09:32 -07:00
}
2026-03-29 19:44:44 -07:00
static int vec0Open ( sqlite3_vtab * p , sqlite3_vtab_cursor * * ppCursor ) {
UNUSED_PARAMETER ( p ) ;
vec0_cursor * pCur ;
pCur = sqlite3_malloc ( sizeof ( * pCur ) ) ;
if ( pCur = = 0 )
2024-04-20 13:38:58 -07:00
return SQLITE_NOMEM ;
2026-03-29 19:44:44 -07:00
memset ( pCur , 0 , sizeof ( * pCur ) ) ;
* ppCursor = & pCur - > base ;
return SQLITE_OK ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
static int vec0Close ( sqlite3_vtab_cursor * cur ) {
vec0_cursor * pCur = ( vec0_cursor * ) cur ;
vec0_cursor_clear ( pCur ) ;
sqlite3_free ( pCur ) ;
return SQLITE_OK ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// All the different type of "values" provided to argv/argc in vec0Filter.
// These enums denote the use and purpose of all of them.
typedef enum {
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// ~~~ KNN QUERIES ~~~ //
VEC0_IDXSTR_KIND_KNN_MATCH = ' { ' ,
VEC0_IDXSTR_KIND_KNN_K = ' } ' ,
VEC0_IDXSTR_KIND_KNN_ROWID_IN = ' [ ' ,
// argv[i] is a constraint on a PARTITON KEY column in a KNN query
//
VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ' ] ' ,
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// argv[i] is a constraint on the distance column in a KNN query
VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT = ' * ' ,
2024-10-11 09:09:32 -07:00
2026-03-29 19:44:44 -07:00
// ~~~ POINT QUERIES ~~~ //
VEC0_IDXSTR_KIND_POINT_ID = ' ! ' ,
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// ~~~ ??? ~~~ //
VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = ' & ' ,
} vec0_idxstr_kind ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// The different SQLITE_INDEX_CONSTRAINT values that vec0 partition key columns
// support, but as characters that fit nicely in idxstr.
typedef enum {
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// Equality constraint on a PARTITON KEY column, ex `user_id = 123`
VEC0_PARTITION_OPERATOR_EQ = ' a ' ,
// "Greater than" constraint on a PARTITON KEY column, ex `year > 2024`
VEC0_PARTITION_OPERATOR_GT = ' b ' ,
// "Less than or equal to" constraint on a PARTITON KEY column, ex `year <= 2024`
VEC0_PARTITION_OPERATOR_LE = ' c ' ,
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// "Less than" constraint on a PARTITON KEY column, ex `year < 2024`
VEC0_PARTITION_OPERATOR_LT = ' d ' ,
// "Greater than or equal to" constraint on a PARTITON KEY column, ex `year >= 2024`
VEC0_PARTITION_OPERATOR_GE = ' e ' ,
// "Not equal to" constraint on a PARTITON KEY column, ex `year != 2024`
VEC0_PARTITION_OPERATOR_NE = ' f ' ,
} vec0_partition_operator ;
typedef enum {
VEC0_METADATA_OPERATOR_EQ = ' a ' ,
VEC0_METADATA_OPERATOR_GT = ' b ' ,
VEC0_METADATA_OPERATOR_LE = ' c ' ,
VEC0_METADATA_OPERATOR_LT = ' d ' ,
VEC0_METADATA_OPERATOR_GE = ' e ' ,
VEC0_METADATA_OPERATOR_NE = ' f ' ,
VEC0_METADATA_OPERATOR_IN = ' g ' ,
} vec0_metadata_operator ;
2024-06-24 23:26:11 -07:00
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
typedef enum {
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
VEC0_DISTANCE_CONSTRAINT_GT = ' a ' ,
VEC0_DISTANCE_CONSTRAINT_GE = ' b ' ,
VEC0_DISTANCE_CONSTRAINT_LT = ' c ' ,
VEC0_DISTANCE_CONSTRAINT_LE = ' d ' ,
} vec0_distance_constraint_operator ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
static int vec0BestIndex ( sqlite3_vtab * pVTab , sqlite3_index_info * pIdxInfo ) {
vec0_vtab * p = ( vec0_vtab * ) pVTab ;
/**
* Possible query plans are :
* 1. KNN when :
* a ) An ` MATCH ` op on vector column
* b ) ORDER BY on distance column
* c ) LIMIT
* d ) rowid in ( . . . ) OPTIONAL
* 2. Point when :
* a ) An ` EQ ` op on rowid column
* 3. else : fullscan
*
*/
int iMatchTerm = - 1 ;
int iMatchVectorTerm = - 1 ;
int iLimitTerm = - 1 ;
int iRowidTerm = - 1 ;
int iKTerm = - 1 ;
int iRowidInTerm = - 1 ;
int hasAuxConstraint = 0 ;
# ifdef SQLITE_VEC_DEBUG
printf ( " pIdxInfo->nOrderBy=%d, pIdxInfo->nConstraint=%d \n " , pIdxInfo - > nOrderBy , pIdxInfo - > nConstraint ) ;
# endif
for ( int i = 0 ; i < pIdxInfo - > nConstraint ; i + + ) {
u8 vtabIn = 0 ;
# if COMPILER_SUPPORTS_VTAB_IN
if ( sqlite3_libversion_number ( ) > = 3038000 ) {
vtabIn = sqlite3_vtab_in ( pIdxInfo , i , - 1 ) ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
# endif
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
# ifdef SQLITE_VEC_DEBUG
printf ( " xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d \n " , i ,
pIdxInfo - > aConstraint [ i ] . usable , pIdxInfo - > aConstraint [ i ] . iColumn ,
pIdxInfo - > aConstraint [ i ] . op , vtabIn ) ;
# endif
if ( ! pIdxInfo - > aConstraint [ i ] . usable )
continue ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
int iColumn = pIdxInfo - > aConstraint [ i ] . iColumn ;
int op = pIdxInfo - > aConstraint [ i ] . op ;
if ( op = = SQLITE_INDEX_CONSTRAINT_LIMIT ) {
iLimitTerm = i ;
2024-06-24 23:26:11 -07:00
}
2026-03-29 19:44:44 -07:00
if ( op = = SQLITE_INDEX_CONSTRAINT_MATCH & &
vec0_column_idx_is_vector ( p , iColumn ) ) {
if ( iMatchTerm > - 1 ) {
vtab_set_error (
pVTab , " only 1 MATCH operator is allowed in a single vec0 query " ) ;
return SQLITE_ERROR ;
}
iMatchTerm = i ;
iMatchVectorTerm = vec0_column_idx_to_vector_idx ( p , iColumn ) ;
}
if ( op = = SQLITE_INDEX_CONSTRAINT_EQ & & iColumn = = VEC0_COLUMN_ID ) {
if ( vtabIn ) {
if ( iRowidInTerm ! = - 1 ) {
vtab_set_error ( pVTab , " only 1 'rowid in (..)' operator is allowed in "
" a single vec0 query " ) ;
return SQLITE_ERROR ;
2024-08-01 02:45:51 -07:00
}
2026-03-29 19:44:44 -07:00
iRowidInTerm = i ;
2024-04-20 13:38:58 -07:00
} else {
2026-03-29 19:44:44 -07:00
iRowidTerm = i ;
2024-04-20 13:38:58 -07:00
}
}
2026-03-29 19:44:44 -07:00
if ( op = = SQLITE_INDEX_CONSTRAINT_EQ & & iColumn = = vec0_column_k_idx ( p ) ) {
iKTerm = i ;
}
if (
( op ! = SQLITE_INDEX_CONSTRAINT_LIMIT & & op ! = SQLITE_INDEX_CONSTRAINT_OFFSET )
& & vec0_column_idx_is_auxiliary ( p , iColumn ) ) {
hasAuxConstraint = 1 ;
}
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_str * idxStr = sqlite3_str_new ( NULL ) ;
int rc ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( iMatchTerm > = 0 ) {
if ( iLimitTerm < 0 & & iKTerm < 0 ) {
vtab_set_error (
pVTab ,
" A LIMIT or 'k = ?' constraint is required on vec0 knn queries. " ) ;
rc = SQLITE_ERROR ;
goto done ;
}
if ( iLimitTerm > = 0 & & iKTerm > = 0 ) {
vtab_set_error ( pVTab , " Only LIMIT or 'k =?' can be provided, not both " ) ;
rc = SQLITE_ERROR ;
goto done ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( pIdxInfo - > nOrderBy ) {
if ( pIdxInfo - > nOrderBy > 1 ) {
vtab_set_error ( pVTab , " Only a single 'ORDER BY distance' clause is "
" allowed on vec0 KNN queries " ) ;
rc = SQLITE_ERROR ;
goto done ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
if ( pIdxInfo - > aOrderBy [ 0 ] . iColumn ! = vec0_column_distance_idx ( p ) ) {
vtab_set_error ( pVTab ,
" Only a single 'ORDER BY distance' clause is allowed on "
" vec0 KNN queries, not on other columns " ) ;
rc = SQLITE_ERROR ;
goto done ;
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
if ( pIdxInfo - > aOrderBy [ 0 ] . desc ) {
vtab_set_error (
pVTab , " Only ascending in ORDER BY distance clause is supported, "
" DESC is not supported yet. " ) ;
rc = SQLITE_ERROR ;
goto done ;
2024-11-20 00:59:34 -08:00
}
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
if ( hasAuxConstraint ) {
// IMP: V25623_09693
vtab_set_error ( pVTab , " An illegal WHERE constraint was provided on a vec0 auxiliary column in a KNN query. " ) ;
rc = SQLITE_ERROR ;
goto done ;
2024-11-20 00:59:34 -08:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
sqlite3_str_appendchar ( idxStr , 1 , VEC0_QUERY_PLAN_KNN ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
int argvIndex = 1 ;
pIdxInfo - > aConstraintUsage [ iMatchTerm ] . argvIndex = argvIndex + + ;
pIdxInfo - > aConstraintUsage [ iMatchTerm ] . omit = 1 ;
sqlite3_str_appendchar ( idxStr , 1 , VEC0_IDXSTR_KIND_KNN_MATCH ) ;
sqlite3_str_appendchar ( idxStr , 3 , ' _ ' ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
if ( iLimitTerm > = 0 ) {
pIdxInfo - > aConstraintUsage [ iLimitTerm ] . argvIndex = argvIndex + + ;
pIdxInfo - > aConstraintUsage [ iLimitTerm ] . omit = 1 ;
} else {
pIdxInfo - > aConstraintUsage [ iKTerm ] . argvIndex = argvIndex + + ;
pIdxInfo - > aConstraintUsage [ iKTerm ] . omit = 1 ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
sqlite3_str_appendchar ( idxStr , 1 , VEC0_IDXSTR_KIND_KNN_K ) ;
sqlite3_str_appendchar ( idxStr , 3 , ' _ ' ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
# if COMPILER_SUPPORTS_VTAB_IN
if ( iRowidInTerm > = 0 ) {
// already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when
// vtabIn == 1
sqlite3_vtab_in ( pIdxInfo , iRowidInTerm , 1 ) ;
pIdxInfo - > aConstraintUsage [ iRowidInTerm ] . argvIndex = argvIndex + + ;
pIdxInfo - > aConstraintUsage [ iRowidInTerm ] . omit = 1 ;
sqlite3_str_appendchar ( idxStr , 1 , VEC0_IDXSTR_KIND_KNN_ROWID_IN ) ;
sqlite3_str_appendchar ( idxStr , 3 , ' _ ' ) ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
# endif
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
// find any PARTITION KEY column constraints
for ( int i = 0 ; i < pIdxInfo - > nConstraint ; i + + ) {
if ( ! pIdxInfo - > aConstraint [ i ] . usable )
continue ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
int iColumn = pIdxInfo - > aConstraint [ i ] . iColumn ;
int op = pIdxInfo - > aConstraint [ i ] . op ;
if ( op = = SQLITE_INDEX_CONSTRAINT_LIMIT | | op = = SQLITE_INDEX_CONSTRAINT_OFFSET ) {
continue ;
}
if ( ! vec0_column_idx_is_partition ( p , iColumn ) ) {
continue ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
int partition_idx = vec0_column_idx_to_partition_idx ( p , iColumn ) ;
char value = 0 ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
switch ( op ) {
case SQLITE_INDEX_CONSTRAINT_EQ : {
value = VEC0_PARTITION_OPERATOR_EQ ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_GT : {
value = VEC0_PARTITION_OPERATOR_GT ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_LE : {
value = VEC0_PARTITION_OPERATOR_LE ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_LT : {
value = VEC0_PARTITION_OPERATOR_LT ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_GE : {
value = VEC0_PARTITION_OPERATOR_GE ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_NE : {
value = VEC0_PARTITION_OPERATOR_NE ;
break ;
}
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
if ( value ) {
pIdxInfo - > aConstraintUsage [ i ] . argvIndex = argvIndex + + ;
pIdxInfo - > aConstraintUsage [ i ] . omit = 1 ;
sqlite3_str_appendchar ( idxStr , 1 , VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT ) ;
sqlite3_str_appendchar ( idxStr , 1 , ' A ' + partition_idx ) ;
sqlite3_str_appendchar ( idxStr , 1 , value ) ;
sqlite3_str_appendchar ( idxStr , 1 , ' _ ' ) ;
2024-11-20 00:02:04 -08:00
}
2024-04-20 13:38:58 -07:00
2024-06-24 23:26:11 -07:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// find any metadata column constraints
for ( int i = 0 ; i < pIdxInfo - > nConstraint ; i + + ) {
if ( ! pIdxInfo - > aConstraint [ i ] . usable )
continue ;
int iColumn = pIdxInfo - > aConstraint [ i ] . iColumn ;
int op = pIdxInfo - > aConstraint [ i ] . op ;
if ( op = = SQLITE_INDEX_CONSTRAINT_LIMIT | | op = = SQLITE_INDEX_CONSTRAINT_OFFSET ) {
continue ;
2024-06-24 23:26:11 -07:00
}
2026-03-29 19:44:44 -07:00
if ( ! vec0_column_idx_is_metadata ( p , iColumn ) ) {
continue ;
2024-06-24 23:26:11 -07:00
}
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
int metadata_idx = vec0_column_idx_to_metadata_idx ( p , iColumn ) ;
char value = 0 ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
switch ( op ) {
case SQLITE_INDEX_CONSTRAINT_EQ : {
int vtabIn = 0 ;
# if COMPILER_SUPPORTS_VTAB_IN
if ( sqlite3_libversion_number ( ) > = 3038000 ) {
vtabIn = sqlite3_vtab_in ( pIdxInfo , i , - 1 ) ;
}
if ( vtabIn ) {
switch ( p - > metadata_columns [ metadata_idx ] . kind ) {
case VEC0_METADATA_COLUMN_KIND_FLOAT :
case VEC0_METADATA_COLUMN_KIND_BOOLEAN : {
// IMP: V15248_32086
rc = SQLITE_ERROR ;
vtab_set_error ( pVTab , " 'xxx in (...)' is only available on INTEGER or TEXT metadata columns. " ) ;
goto done ;
break ;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER :
case VEC0_METADATA_COLUMN_KIND_TEXT : {
break ;
}
}
value = VEC0_METADATA_OPERATOR_IN ;
sqlite3_vtab_in ( pIdxInfo , i , 1 ) ;
} else
# endif
{
value = VEC0_PARTITION_OPERATOR_EQ ;
}
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case SQLITE_INDEX_CONSTRAINT_GT : {
value = VEC0_METADATA_OPERATOR_GT ;
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case SQLITE_INDEX_CONSTRAINT_LE : {
value = VEC0_METADATA_OPERATOR_LE ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_LT : {
value = VEC0_METADATA_OPERATOR_LT ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_GE : {
value = VEC0_METADATA_OPERATOR_GE ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_NE : {
value = VEC0_METADATA_OPERATOR_NE ;
break ;
}
default : {
// IMP: V16511_00582
rc = SQLITE_ERROR ;
vtab_set_error ( pVTab ,
" An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. "
" Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed. "
) ;
goto done ;
}
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
if ( p - > metadata_columns [ metadata_idx ] . kind = = VEC0_METADATA_COLUMN_KIND_BOOLEAN ) {
if ( ! ( value = = VEC0_METADATA_OPERATOR_EQ | | value = = VEC0_METADATA_OPERATOR_NE ) ) {
// IMP: V10145_26984
rc = SQLITE_ERROR ;
vtab_set_error ( pVTab , " ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns. " ) ;
goto done ;
}
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
pIdxInfo - > aConstraintUsage [ i ] . argvIndex = argvIndex + + ;
pIdxInfo - > aConstraintUsage [ i ] . omit = 1 ;
sqlite3_str_appendchar ( idxStr , 1 , VEC0_IDXSTR_KIND_METADATA_CONSTRAINT ) ;
sqlite3_str_appendchar ( idxStr , 1 , ' A ' + metadata_idx ) ;
sqlite3_str_appendchar ( idxStr , 1 , value ) ;
sqlite3_str_appendchar ( idxStr , 1 , ' _ ' ) ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
// find any distance column constraints
for ( int i = 0 ; i < pIdxInfo - > nConstraint ; i + + ) {
if ( ! pIdxInfo - > aConstraint [ i ] . usable )
continue ;
int iColumn = pIdxInfo - > aConstraint [ i ] . iColumn ;
int op = pIdxInfo - > aConstraint [ i ] . op ;
if ( op = = SQLITE_INDEX_CONSTRAINT_LIMIT | | op = = SQLITE_INDEX_CONSTRAINT_OFFSET ) {
continue ;
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
if ( vec0_column_distance_idx ( p ) ! = iColumn ) {
continue ;
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
char value = 0 ;
switch ( op ) {
case SQLITE_INDEX_CONSTRAINT_GT : {
value = VEC0_DISTANCE_CONSTRAINT_GT ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_GE : {
value = VEC0_DISTANCE_CONSTRAINT_GE ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_LT : {
value = VEC0_DISTANCE_CONSTRAINT_LT ;
break ;
}
case SQLITE_INDEX_CONSTRAINT_LE : {
value = VEC0_DISTANCE_CONSTRAINT_LE ;
break ;
}
default : {
// IMP TODO
rc = SQLITE_ERROR ;
vtab_set_error (
pVTab ,
" Illegal WHERE constraint on distance column in a KNN query. "
" Only one of GT, GE, LT, LE constraints are allowed. "
) ;
goto done ;
}
2024-11-20 00:30:23 -08:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
pIdxInfo - > aConstraintUsage [ i ] . argvIndex = argvIndex + + ;
pIdxInfo - > aConstraintUsage [ i ] . omit = 1 ;
sqlite3_str_appendchar ( idxStr , 1 , VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT ) ;
sqlite3_str_appendchar ( idxStr , 1 , value ) ;
sqlite3_str_appendchar ( idxStr , 1 , ' _ ' ) ;
sqlite3_str_appendchar ( idxStr , 1 , ' _ ' ) ;
}
2024-04-20 13:38:58 -07:00
2024-06-25 08:52:48 -07:00
2026-03-29 19:44:44 -07:00
pIdxInfo - > idxNum = iMatchVectorTerm ;
pIdxInfo - > estimatedCost = 30.0 ;
pIdxInfo - > estimatedRows = 10 ;
2024-06-25 08:52:48 -07:00
2026-03-29 19:44:44 -07:00
} else if ( iRowidTerm > = 0 ) {
sqlite3_str_appendchar ( idxStr , 1 , VEC0_QUERY_PLAN_POINT ) ;
pIdxInfo - > aConstraintUsage [ iRowidTerm ] . argvIndex = 1 ;
pIdxInfo - > aConstraintUsage [ iRowidTerm ] . omit = 1 ;
sqlite3_str_appendchar ( idxStr , 1 , VEC0_IDXSTR_KIND_POINT_ID ) ;
sqlite3_str_appendchar ( idxStr , 3 , ' _ ' ) ;
pIdxInfo - > idxNum = pIdxInfo - > colUsed ;
pIdxInfo - > estimatedCost = 10.0 ;
pIdxInfo - > estimatedRows = 1 ;
} else {
sqlite3_str_appendchar ( idxStr , 1 , VEC0_QUERY_PLAN_FULLSCAN ) ;
pIdxInfo - > estimatedCost = 3000000.0 ;
pIdxInfo - > estimatedRows = 100000 ;
2024-06-25 08:52:48 -07:00
}
2026-03-29 19:44:44 -07:00
pIdxInfo - > idxStr = sqlite3_str_finish ( idxStr ) ;
idxStr = NULL ;
if ( ! pIdxInfo - > idxStr ) {
rc = SQLITE_OK ;
2024-11-20 00:59:34 -08:00
goto done ;
}
2026-03-29 19:44:44 -07:00
pIdxInfo - > needToFreeIdxStr = 1 ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
rc = SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
done :
if ( idxStr ) {
sqlite3_str_finish ( idxStr ) ;
2024-06-25 08:52:48 -07:00
}
2026-03-29 19:44:44 -07:00
return rc ;
}
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
// forward delcaration bc vec0Filter uses it
static int vec0Next ( sqlite3_vtab_cursor * cur ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
void merge_sorted_lists ( f32 * a , i64 * a_rowids , i64 a_length , f32 * b ,
i64 * b_rowids , i32 * b_top_idxs , i64 b_length , f32 * out ,
i64 * out_rowids , i64 out_length , i64 * out_used ) {
// assert((a_length >= out_length) || (b_length >= out_length));
i64 ptrA = 0 ;
i64 ptrB = 0 ;
for ( int i = 0 ; i < out_length ; i + + ) {
if ( ( ptrA > = a_length ) & & ( ptrB > = b_length ) ) {
* out_used = i ;
return ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
if ( ptrA > = a_length ) {
out [ i ] = b [ b_top_idxs [ ptrB ] ] ;
out_rowids [ i ] = b_rowids [ b_top_idxs [ ptrB ] ] ;
ptrB + + ;
} else if ( ptrB > = b_length ) {
out [ i ] = a [ ptrA ] ;
out_rowids [ i ] = a_rowids [ ptrA ] ;
ptrA + + ;
} else {
if ( a [ ptrA ] < = b [ b_top_idxs [ ptrB ] ] ) {
out [ i ] = a [ ptrA ] ;
out_rowids [ i ] = a_rowids [ ptrA ] ;
ptrA + + ;
} else {
out [ i ] = b [ b_top_idxs [ ptrB ] ] ;
out_rowids [ i ] = b_rowids [ b_top_idxs [ ptrB ] ] ;
ptrB + + ;
2024-11-20 00:59:34 -08:00
}
}
}
2026-03-29 19:44:44 -07:00
* out_used = out_length ;
}
2024-07-05 12:07:45 -07:00
2026-03-29 19:44:44 -07:00
u8 * bitmap_new ( i32 n ) {
assert ( n % 8 = = 0 ) ;
u8 * p = sqlite3_malloc ( n * sizeof ( u8 ) / CHAR_BIT ) ;
if ( p ) {
memset ( p , 0 , n * sizeof ( u8 ) / CHAR_BIT ) ;
2024-07-05 12:07:45 -07:00
}
2026-03-29 19:44:44 -07:00
return p ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
u8 * bitmap_new_from ( i32 n , u8 * from ) {
assert ( n % 8 = = 0 ) ;
u8 * p = sqlite3_malloc ( n * sizeof ( u8 ) / CHAR_BIT ) ;
if ( p ) {
memcpy ( p , from , n / CHAR_BIT ) ;
}
return p ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
void bitmap_copy ( u8 * base , u8 * from , i32 n ) {
assert ( n % 8 = = 0 ) ;
memcpy ( base , from , n / CHAR_BIT ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
void bitmap_and_inplace ( u8 * base , u8 * other , i32 n ) {
assert ( ( n % 8 ) = = 0 ) ;
for ( int i = 0 ; i < n / CHAR_BIT ; i + + ) {
base [ i ] = base [ i ] & other [ i ] ;
}
}
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
void bitmap_set ( u8 * bitmap , i32 position , int value ) {
if ( value ) {
bitmap [ position / CHAR_BIT ] | = 1 < < ( position % CHAR_BIT ) ;
} else {
bitmap [ position / CHAR_BIT ] & = ~ ( 1 < < ( position % CHAR_BIT ) ) ;
}
}
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
int bitmap_get ( u8 * bitmap , i32 position ) {
return ( ( ( bitmap [ position / CHAR_BIT ] ) > > ( position % CHAR_BIT ) ) & 1 ) ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
void bitmap_clear ( u8 * bitmap , i32 n ) {
assert ( ( n % 8 ) = = 0 ) ;
memset ( bitmap , 0 , n / CHAR_BIT ) ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
void bitmap_fill ( u8 * bitmap , i32 n ) {
assert ( ( n % 8 ) = = 0 ) ;
memset ( bitmap , 0xFF , n / CHAR_BIT ) ;
}
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
/**
* @ brief Finds the minimum k items in distances , and writes the indicies to
* out .
*
* @ param distances input f32 array of size n , the items to consider .
* @ param n : size of distances array .
* @ param out : Output array of size k , will contain at most k element indicies
* @ param k : Size of output array
* @ return int
*/
int min_idx ( const f32 * distances , i32 n , u8 * candidates , i32 * out , i32 k ,
u8 * bTaken , i32 * k_used ) {
assert ( k > 0 ) ;
assert ( k < = n ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# ifdef SQLITE_VEC_EXPERIMENTAL_MIN_IDX
// Max-heap variant: O(n log k) single-pass.
// out[0..heap_size-1] stores indices; heap ordered by distances descending
// so out[0] is always the index of the LARGEST distance in the top-k.
( void ) bTaken ;
int heap_size = 0 ;
# define HEAP_SIFT_UP(pos) do { \
int _c = ( pos ) ; \
while ( _c > 0 ) { \
int _p = ( _c - 1 ) / 2 ; \
if ( distances [ out [ _p ] ] < distances [ out [ _c ] ] ) { \
i32 _tmp = out [ _p ] ; out [ _p ] = out [ _c ] ; out [ _c ] = _tmp ; \
_c = _p ; \
} else break ; \
} \
} while ( 0 )
# define HEAP_SIFT_DOWN(pos, sz) do { \
int _p = ( pos ) ; \
for ( ; ; ) { \
int _l = 2 * _p + 1 , _r = 2 * _p + 2 , _largest = _p ; \
if ( _l < ( sz ) & & distances [ out [ _l ] ] > distances [ out [ _largest ] ] ) \
_largest = _l ; \
if ( _r < ( sz ) & & distances [ out [ _r ] ] > distances [ out [ _largest ] ] ) \
_largest = _r ; \
if ( _largest = = _p ) break ; \
i32 _tmp = out [ _p ] ; out [ _p ] = out [ _largest ] ; out [ _largest ] = _tmp ; \
_p = _largest ; \
} \
} while ( 0 )
for ( int i = 0 ; i < n ; i + + ) {
if ( ! bitmap_get ( candidates , i ) )
continue ;
if ( heap_size < k ) {
out [ heap_size ] = i ;
heap_size + + ;
HEAP_SIFT_UP ( heap_size - 1 ) ;
} else if ( distances [ i ] < distances [ out [ 0 ] ] ) {
out [ 0 ] = i ;
HEAP_SIFT_DOWN ( 0 , heap_size ) ;
}
}
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
// Heapsort to produce ascending order.
for ( int i = heap_size - 1 ; i > 0 ; i - - ) {
i32 tmp = out [ 0 ] ; out [ 0 ] = out [ i ] ; out [ i ] = tmp ;
HEAP_SIFT_DOWN ( 0 , i ) ;
}
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
# undef HEAP_SIFT_UP
# undef HEAP_SIFT_DOWN
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
* k_used = heap_size ;
return SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# else
// Original: O(n*k) repeated linear scan with bitmap.
bitmap_clear ( bTaken , n ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
for ( int ik = 0 ; ik < k ; ik + + ) {
int min_idx = 0 ;
while ( min_idx < n & &
( bitmap_get ( bTaken , min_idx ) | | ! bitmap_get ( candidates , min_idx ) ) ) {
min_idx + + ;
}
if ( min_idx > = n ) {
* k_used = ik ;
return SQLITE_OK ;
}
2024-08-05 16:46:35 -07:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < n ; i + + ) {
if ( distances [ i ] < = distances [ min_idx ] & & ! bitmap_get ( bTaken , i ) & &
( bitmap_get ( candidates , i ) ) ) {
min_idx = i ;
}
2024-04-20 13:38:58 -07:00
}
2024-08-05 16:46:35 -07:00
2026-03-29 19:44:44 -07:00
out [ ik ] = min_idx ;
bitmap_set ( bTaken , min_idx , 1 ) ;
}
* k_used = k ;
return SQLITE_OK ;
2024-04-20 13:38:58 -07:00
# endif
2026-03-29 19:44:44 -07:00
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
int vec0_get_metadata_text_long_value (
vec0_vtab * p ,
sqlite3_stmt * * stmt ,
int metadata_idx ,
i64 rowid ,
int * n ,
char * * s ) {
int rc ;
if ( ! ( * stmt ) ) {
const char * zSql = sqlite3_mprintf ( " select data from " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " where rowid = ? " , p - > schemaName , p - > tableName , metadata_idx ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto done ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , stmt , NULL ) ;
sqlite3_free ( ( void * ) zSql ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
sqlite3_reset ( * stmt ) ;
sqlite3_bind_int64 ( * stmt , 1 , rowid ) ;
rc = sqlite3_step ( * stmt ) ;
if ( rc ! = SQLITE_ROW ) {
rc = SQLITE_ERROR ;
goto done ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
* s = ( char * ) sqlite3_column_text ( * stmt , 0 ) ;
* n = sqlite3_column_bytes ( * stmt , 0 ) ;
rc = SQLITE_OK ;
done :
return rc ;
}
/**
* @ brief Crete at " iterator " ( sqlite3_stmt ) of chunks with the given constraints
*
* Any VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT values in idxStr / argv will be applied
* as WHERE constraints in the underlying stmt SQL , and any consumer of the stmt
* can freely step through the stmt with all constraints satisfied .
*
* @ param p - vec0_vtab
* @ param idxStr - the xBestIndex / xFilter idxstr containing VEC0_IDXSTR values
* @ param argc - number of argv values from xFilter
* @ param argv - array of sqlite3_value from xFilter
* @ param outStmt - output sqlite3_stmt of chunks with all filters applied
* @ return int SQLITE_OK on success , error code otherwise
*/
int vec0_chunks_iter ( vec0_vtab * p , const char * idxStr , int argc , sqlite3_value * * argv , sqlite3_stmt * * outStmt ) {
// always null terminated, enforced by SQLite
int idxStrLength = strlen ( idxStr ) ;
// "1" refers to the initial vec0_query_plan char, 4 is the number of chars per "element"
int numValueEntries = ( idxStrLength - 1 ) / 4 ;
assert ( argc = = numValueEntries ) ;
2024-11-20 00:02:04 -08:00
int rc ;
2026-03-29 19:44:44 -07:00
sqlite3_str * s = sqlite3_str_new ( NULL ) ;
sqlite3_str_appendf ( s , " select chunk_id, validity, rowids "
" from " VEC0_SHADOW_CHUNKS_NAME ,
p - > schemaName , p - > tableName ) ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
int appendedWhere = 0 ;
for ( int i = 0 ; i < numValueEntries ; i + + ) {
int idx = 1 + ( i * 4 ) ;
char kind = idxStr [ idx + 0 ] ;
if ( kind ! = VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT ) {
continue ;
2024-04-20 13:38:58 -07:00
}
2024-06-28 15:29:13 -07:00
2026-03-29 19:44:44 -07:00
int partition_idx = idxStr [ idx + 1 ] - ' A ' ;
int operator = idxStr [ idx + 2 ] ;
// idxStr[idx + 3] is just null, a '_' placeholder
if ( ! appendedWhere ) {
sqlite3_str_appendall ( s , " WHERE " ) ;
appendedWhere = 1 ;
} else {
sqlite3_str_appendall ( s , " AND " ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
switch ( operator ) {
case VEC0_PARTITION_OPERATOR_EQ :
sqlite3_str_appendf ( s , " partition%02d = ? " , partition_idx ) ;
break ;
case VEC0_PARTITION_OPERATOR_GT :
sqlite3_str_appendf ( s , " partition%02d > ? " , partition_idx ) ;
break ;
case VEC0_PARTITION_OPERATOR_LE :
sqlite3_str_appendf ( s , " partition%02d <= ? " , partition_idx ) ;
break ;
case VEC0_PARTITION_OPERATOR_LT :
sqlite3_str_appendf ( s , " partition%02d < ? " , partition_idx ) ;
break ;
case VEC0_PARTITION_OPERATOR_GE :
sqlite3_str_appendf ( s , " partition%02d >= ? " , partition_idx ) ;
break ;
case VEC0_PARTITION_OPERATOR_NE :
sqlite3_str_appendf ( s , " partition%02d != ? " , partition_idx ) ;
break ;
default : {
char * zSql = sqlite3_str_finish ( s ) ;
sqlite3_free ( zSql ) ;
return SQLITE_ERROR ;
}
2024-04-20 13:38:58 -07:00
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
char * zSql = sqlite3_str_finish ( s ) ;
if ( ! zSql ) {
return SQLITE_NOMEM ;
}
2024-10-11 09:09:32 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , outStmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2024-08-05 16:46:35 -07:00
2026-03-29 19:44:44 -07:00
int n = 1 ;
for ( int i = 0 ; i < numValueEntries ; i + + ) {
int idx = 1 + ( i * 4 ) ;
char kind = idxStr [ idx + 0 ] ;
if ( kind ! = VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT ) {
continue ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_bind_value ( * outStmt , n + + , argv [ i ] ) ;
}
2024-08-05 16:46:35 -07:00
2026-03-29 19:44:44 -07:00
return rc ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now.
struct Vec0MetadataIn {
// index of argv[i]` the constraint is on
int argv_idx ;
// metadata column index of the constraint, derived from idxStr + argv_idx
int metadata_idx ;
// array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next()
struct Array array ;
} ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// Array elements for `xxx in (...)` values for a text column. basically just a string
struct Vec0MetadataInTextEntry {
int n ;
char * zString ;
} ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
int vec0_metadata_filter_text ( vec0_vtab * p , sqlite3_value * value , const void * buffer , int size , vec0_metadata_operator op , u8 * b , int metadata_idx , int chunk_rowid , struct Array * aMetadataIn , int argv_idx ) {
int rc ;
sqlite3_stmt * stmt = NULL ;
i64 * rowids = NULL ;
sqlite3_blob * rowidsBlob ;
const char * sTarget = ( const char * ) sqlite3_value_text ( value ) ;
int nTarget = sqlite3_value_bytes ( value ) ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
// TODO(perf): only text metadata news the rowids BLOB. Make it so that
// rowids BLOB is re-used when multiple fitlers on text columns,
// ex "name BETWEEN 'a' and 'b'""
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowChunksName , " rowids " , chunk_rowid , 0 , & rowidsBlob ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
assert ( sqlite3_blob_bytes ( rowidsBlob ) % sizeof ( i64 ) = = 0 ) ;
assert ( ( sqlite3_blob_bytes ( rowidsBlob ) / sizeof ( i64 ) ) = = size ) ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
rowids = sqlite3_malloc ( sqlite3_blob_bytes ( rowidsBlob ) ) ;
if ( ! rowids ) {
sqlite3_blob_close ( rowidsBlob ) ;
return SQLITE_NOMEM ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_read ( rowidsBlob , rowids , sqlite3_blob_bytes ( rowidsBlob ) , 0 ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_blob_close ( rowidsBlob ) ;
return rc ;
}
sqlite3_blob_close ( rowidsBlob ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
switch ( op ) {
int nPrefix ;
char * sPrefix ;
char * sFull ;
int nFull ;
u8 * view ;
case VEC0_METADATA_OPERATOR_EQ : {
for ( int i = 0 ; i < size ; i + + ) {
view = & ( ( u8 * ) buffer ) [ i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
nPrefix = ( ( int * ) view ) [ 0 ] ;
sPrefix = ( char * ) & view [ 4 ] ;
// for EQ the text lengths must match
if ( nPrefix ! = nTarget ) {
bitmap_set ( b , i , 0 ) ;
continue ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
int cmpPrefix = strncmp ( sPrefix , sTarget , min ( nPrefix , VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) ) ;
// for short strings, use the prefix comparison direclty
if ( nPrefix < = VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
bitmap_set ( b , i , cmpPrefix = = 0 ) ;
continue ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
// for EQ on longs strings, the prefix must match
if ( cmpPrefix ) {
bitmap_set ( b , i , 0 ) ;
continue ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
// consult the full string
rc = vec0_get_metadata_text_long_value ( p , & stmt , metadata_idx , rowids [ i ] , & nFull , & sFull ) ;
if ( rc ! = SQLITE_OK ) {
2024-11-20 00:59:34 -08:00
goto done ;
}
2026-03-29 19:44:44 -07:00
if ( nPrefix ! = nFull ) {
2024-11-20 00:59:34 -08:00
rc = SQLITE_ERROR ;
goto done ;
}
2026-03-29 19:44:44 -07:00
bitmap_set ( b , i , strncmp ( sFull , sTarget , nFull ) = = 0 ) ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_NE : {
for ( int i = 0 ; i < size ; i + + ) {
view = & ( ( u8 * ) buffer ) [ i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
nPrefix = ( ( int * ) view ) [ 0 ] ;
sPrefix = ( char * ) & view [ 4 ] ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
// for NE if text lengths dont match, it never will
if ( nPrefix ! = nTarget ) {
bitmap_set ( b , i , 1 ) ;
continue ;
}
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
int cmpPrefix = strncmp ( sPrefix , sTarget , min ( nPrefix , VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) ) ;
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
// for short strings, use the prefix comparison direclty
if ( nPrefix < = VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
bitmap_set ( b , i , cmpPrefix ! = 0 ) ;
continue ;
2026-02-13 06:38:26 -08:00
}
2026-03-29 19:44:44 -07:00
// for NE on longs strings, if prefixes dont match, then long string wont
if ( cmpPrefix ) {
bitmap_set ( b , i , 1 ) ;
continue ;
2026-02-13 06:38:26 -08:00
}
2026-03-29 19:44:44 -07:00
// consult the full string
rc = vec0_get_metadata_text_long_value ( p , & stmt , metadata_idx , rowids [ i ] , & nFull , & sFull ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
2026-02-13 06:38:26 -08:00
}
2026-03-29 19:44:44 -07:00
if ( nPrefix ! = nFull ) {
2026-02-13 06:38:26 -08:00
rc = SQLITE_ERROR ;
goto done ;
}
2026-03-29 19:44:44 -07:00
bitmap_set ( b , i , strncmp ( sFull , sTarget , nFull ) ! = 0 ) ;
2026-02-13 06:38:26 -08:00
}
2026-03-29 19:44:44 -07:00
break ;
2026-02-13 06:38:26 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_GT : {
for ( int i = 0 ; i < size ; i + + ) {
view = & ( ( u8 * ) buffer ) [ i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
nPrefix = ( ( int * ) view ) [ 0 ] ;
sPrefix = ( char * ) & view [ 4 ] ;
int cmpPrefix = strncmp ( sPrefix , sTarget , min ( min ( nPrefix , VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) , nTarget ) ) ;
2026-02-13 06:38:26 -08:00
2026-03-29 19:44:44 -07:00
if ( nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
// if prefix match, check which is longer
if ( cmpPrefix = = 0 ) {
bitmap_set ( b , i , nPrefix > nTarget ) ;
}
else {
bitmap_set ( b , i , cmpPrefix > 0 ) ;
}
continue ;
}
// TODO(perf): may not need to compare full text in some cases
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
rc = vec0_get_metadata_text_long_value ( p , & stmt , metadata_idx , rowids [ i ] , & nFull , & sFull ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
if ( nPrefix ! = nFull ) {
rc = SQLITE_ERROR ;
goto done ;
}
bitmap_set ( b , i , strncmp ( sFull , sTarget , nFull ) > 0 ) ;
}
break ;
}
case VEC0_METADATA_OPERATOR_GE : {
for ( int i = 0 ; i < size ; i + + ) {
view = & ( ( u8 * ) buffer ) [ i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
nPrefix = ( ( int * ) view ) [ 0 ] ;
sPrefix = ( char * ) & view [ 4 ] ;
int cmpPrefix = strncmp ( sPrefix , sTarget , min ( min ( nPrefix , VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) , nTarget ) ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
if ( nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
// if prefix match, check which is longer
if ( cmpPrefix = = 0 ) {
bitmap_set ( b , i , nPrefix > = nTarget ) ;
}
else {
bitmap_set ( b , i , cmpPrefix > = 0 ) ;
}
continue ;
}
// TODO(perf): may not need to compare full text in some cases
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
rc = vec0_get_metadata_text_long_value ( p , & stmt , metadata_idx , rowids [ i ] , & nFull , & sFull ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
if ( nPrefix ! = nFull ) {
rc = SQLITE_ERROR ;
goto done ;
}
bitmap_set ( b , i , strncmp ( sFull , sTarget , nFull ) > = 0 ) ;
}
break ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_LE : {
for ( int i = 0 ; i < size ; i + + ) {
view = & ( ( u8 * ) buffer ) [ i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
nPrefix = ( ( int * ) view ) [ 0 ] ;
sPrefix = ( char * ) & view [ 4 ] ;
int cmpPrefix = strncmp ( sPrefix , sTarget , min ( min ( nPrefix , VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) , nTarget ) ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
// if prefix match, check which is longer
if ( cmpPrefix = = 0 ) {
bitmap_set ( b , i , nPrefix < = nTarget ) ;
}
else {
bitmap_set ( b , i , cmpPrefix < = 0 ) ;
}
continue ;
}
// TODO(perf): may not need to compare full text in some cases
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = vec0_get_metadata_text_long_value ( p , & stmt , metadata_idx , rowids [ i ] , & nFull , & sFull ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
if ( nPrefix ! = nFull ) {
rc = SQLITE_ERROR ;
goto done ;
}
bitmap_set ( b , i , strncmp ( sFull , sTarget , nFull ) < = 0 ) ;
}
break ;
2024-07-05 12:07:45 -07:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_LT : {
for ( int i = 0 ; i < size ; i + + ) {
view = & ( ( u8 * ) buffer ) [ i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
nPrefix = ( ( int * ) view ) [ 0 ] ;
sPrefix = ( char * ) & view [ 4 ] ;
int cmpPrefix = strncmp ( sPrefix , sTarget , min ( min ( nPrefix , VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) , nTarget ) ) ;
if ( nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
// if prefix match, check which is longer
if ( cmpPrefix = = 0 ) {
bitmap_set ( b , i , nPrefix < nTarget ) ;
}
else {
bitmap_set ( b , i , cmpPrefix < 0 ) ;
}
continue ;
}
// TODO(perf): may not need to compare full text in some cases
rc = vec0_get_metadata_text_long_value ( p , & stmt , metadata_idx , rowids [ i ] , & nFull , & sFull ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
if ( nPrefix ! = nFull ) {
rc = SQLITE_ERROR ;
goto done ;
}
bitmap_set ( b , i , strncmp ( sFull , sTarget , nFull ) < 0 ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
break ;
2024-04-20 13:38:58 -07:00
}
2024-07-05 12:07:45 -07:00
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_IN : {
size_t metadataInIdx = - 1 ;
for ( size_t i = 0 ; i < aMetadataIn - > length ; i + + ) {
struct Vec0MetadataIn * metadataIn = & ( ( ( struct Vec0MetadataIn * ) aMetadataIn - > z ) [ i ] ) ;
if ( metadataIn - > argv_idx = = argv_idx ) {
metadataInIdx = i ;
break ;
}
}
if ( metadataInIdx < 0 ) {
rc = SQLITE_ERROR ;
goto done ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
struct Vec0MetadataIn * metadataIn = & ( ( struct Vec0MetadataIn * ) aMetadataIn - > z ) [ metadataInIdx ] ;
struct Array * aTarget = & ( metadataIn - > array ) ;
2024-06-25 08:52:48 -07:00
2026-03-29 19:44:44 -07:00
int nPrefix ;
char * sPrefix ;
char * sFull ;
int nFull ;
u8 * view ;
for ( int i = 0 ; i < size ; i + + ) {
view = & ( ( u8 * ) buffer ) [ i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
nPrefix = ( ( int * ) view ) [ 0 ] ;
sPrefix = ( char * ) & view [ 4 ] ;
for ( size_t target_idx = 0 ; target_idx < aTarget - > length ; target_idx + + ) {
struct Vec0MetadataInTextEntry * entry = & ( ( ( struct Vec0MetadataInTextEntry * ) aTarget - > z ) [ target_idx ] ) ;
if ( entry - > n ! = nPrefix ) {
continue ;
}
int cmpPrefix = strncmp ( sPrefix , entry - > zString , min ( nPrefix , VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) ) ;
if ( nPrefix < = VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
if ( cmpPrefix = = 0 ) {
bitmap_set ( b , i , 1 ) ;
break ;
}
continue ;
}
if ( cmpPrefix ) {
continue ;
}
2024-07-05 12:07:45 -07:00
2026-03-29 19:44:44 -07:00
rc = vec0_get_metadata_text_long_value ( p , & stmt , metadata_idx , rowids [ i ] , & nFull , & sFull ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
if ( nPrefix ! = nFull ) {
rc = SQLITE_ERROR ;
goto done ;
}
if ( strncmp ( sFull , entry - > zString , nFull ) = = 0 ) {
bitmap_set ( b , i , 1 ) ;
break ;
}
}
}
break ;
}
2024-07-05 12:07:45 -07:00
2026-03-29 19:44:44 -07:00
}
rc = SQLITE_OK ;
2024-07-05 12:07:45 -07:00
2026-03-29 19:44:44 -07:00
done :
sqlite3_finalize ( stmt ) ;
sqlite3_free ( rowids ) ;
return rc ;
2024-07-05 12:07:45 -07:00
2024-07-31 12:56:09 -07:00
}
2024-04-20 13:38:58 -07:00
/**
2026-03-29 19:44:44 -07:00
* @ brief Fill in bitmap of chunk values , whether or not the values match a metadata constraint
2024-11-20 00:02:04 -08:00
*
2026-03-29 19:44:44 -07:00
* @ param p vec0_vtab
* @ param metadata_idx index of the metatadata column to perfrom constraints on
* @ param value sqlite3_value of the constraints value
* @ param blob sqlite3_blob that is already opened on the metdata column ' s shadow chunk table
* @ param chunk_rowid rowid of the chunk to calculate on
* @ param b pre - allocated and zero ' d out bitmap to write results to
* @ param size size of the chunk
2024-11-20 00:02:04 -08:00
* @ return int SQLITE_OK on success , error code otherwise
*/
2026-03-29 19:44:44 -07:00
int vec0_set_metadata_filter_bitmap (
vec0_vtab * p ,
int metadata_idx ,
vec0_metadata_operator op ,
sqlite3_value * value ,
sqlite3_blob * blob ,
i64 chunk_rowid ,
u8 * b ,
int size ,
struct Array * aMetadataIn , int argv_idx ) {
// TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap?
2024-11-20 00:02:04 -08:00
int rc ;
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_reopen ( blob , chunk_rowid ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
vec0_metadata_column_kind kind = p - > metadata_columns [ metadata_idx ] . kind ;
int szMatch = 0 ;
int blobSize = sqlite3_blob_bytes ( blob ) ;
switch ( kind ) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN : {
szMatch = blobSize = = size / CHAR_BIT ;
2024-11-20 00:02:04 -08:00
break ;
2026-03-29 19:44:44 -07:00
}
case VEC0_METADATA_COLUMN_KIND_INTEGER : {
szMatch = blobSize = = size * sizeof ( i64 ) ;
2024-11-20 00:02:04 -08:00
break ;
2026-03-29 19:44:44 -07:00
}
case VEC0_METADATA_COLUMN_KIND_FLOAT : {
szMatch = blobSize = = size * sizeof ( double ) ;
2024-11-20 00:02:04 -08:00
break ;
2026-03-29 19:44:44 -07:00
}
case VEC0_METADATA_COLUMN_KIND_TEXT : {
szMatch = blobSize = = size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ;
2024-11-20 00:02:04 -08:00
break ;
}
}
2026-03-29 19:44:44 -07:00
if ( ! szMatch ) {
return SQLITE_ERROR ;
}
void * buffer = sqlite3_malloc ( blobSize ) ;
if ( ! buffer ) {
2024-11-20 00:02:04 -08:00
return SQLITE_NOMEM ;
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_read ( blob , buffer , blobSize , 0 ) ;
2024-11-20 00:02:04 -08:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
goto done ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
switch ( kind ) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN : {
int target = sqlite3_value_int ( value ) ;
if ( ( target & & op = = VEC0_METADATA_OPERATOR_EQ ) | | ( ! target & & op = = VEC0_METADATA_OPERATOR_NE ) ) {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , bitmap_get ( ( u8 * ) buffer , i ) ) ; }
}
else {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , ! bitmap_get ( ( u8 * ) buffer , i ) ) ; }
}
break ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_COLUMN_KIND_INTEGER : {
i64 * array = ( i64 * ) buffer ;
i64 target = sqlite3_value_int64 ( value ) ;
switch ( op ) {
case VEC0_METADATA_OPERATOR_EQ : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] = = target ) ; }
break ;
}
case VEC0_METADATA_OPERATOR_GT : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] > target ) ; }
break ;
}
case VEC0_METADATA_OPERATOR_LE : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] < = target ) ; }
break ;
}
case VEC0_METADATA_OPERATOR_LT : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] < target ) ; }
break ;
}
case VEC0_METADATA_OPERATOR_GE : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] > = target ) ; }
break ;
}
case VEC0_METADATA_OPERATOR_NE : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] ! = target ) ; }
break ;
}
case VEC0_METADATA_OPERATOR_IN : {
int metadataInIdx = - 1 ;
for ( size_t i = 0 ; i < aMetadataIn - > length ; i + + ) {
struct Vec0MetadataIn * metadataIn = & ( ( struct Vec0MetadataIn * ) aMetadataIn - > z ) [ i ] ;
if ( metadataIn - > argv_idx = = argv_idx ) {
metadataInIdx = i ;
break ;
}
}
if ( metadataInIdx < 0 ) {
rc = SQLITE_ERROR ;
goto done ;
}
struct Vec0MetadataIn * metadataIn = & ( ( struct Vec0MetadataIn * ) aMetadataIn - > z ) [ metadataInIdx ] ;
struct Array * aTarget = & ( metadataIn - > array ) ;
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < size ; i + + ) {
for ( size_t target_idx = 0 ; target_idx < aTarget - > length ; target_idx + + ) {
if ( ( ( i64 * ) aTarget - > z ) [ target_idx ] = = array [ i ] ) {
bitmap_set ( b , i , 1 ) ;
break ;
}
}
}
break ;
2024-11-20 00:59:34 -08:00
}
}
break ;
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_COLUMN_KIND_FLOAT : {
double * array = ( double * ) buffer ;
double target = sqlite3_value_double ( value ) ;
switch ( op ) {
case VEC0_METADATA_OPERATOR_EQ : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] = = target ) ; }
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_GT : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] > target ) ; }
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_LE : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] < = target ) ; }
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_LT : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] < target ) ; }
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_GE : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] > = target ) ; }
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_NE : {
for ( int i = 0 ; i < size ; i + + ) { bitmap_set ( b , i , array [ i ] ! = target ) ; }
break ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_OPERATOR_IN : {
// should never be reached
break ;
2024-11-20 00:59:34 -08:00
}
}
break ;
}
2026-03-29 19:44:44 -07:00
case VEC0_METADATA_COLUMN_KIND_TEXT : {
rc = vec0_metadata_filter_text ( p , value , buffer , size , op , b , metadata_idx , chunk_rowid , aMetadataIn , argv_idx ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
2024-11-20 00:59:34 -08:00
}
break ;
}
2026-03-29 19:44:44 -07:00
}
done :
sqlite3_free ( buffer ) ;
return rc ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
int vec0Filter_knn_chunks_iter ( vec0_vtab * p , sqlite3_stmt * stmtChunks ,
struct VectorColumnDefinition * vector_column ,
int vectorColumnIdx , struct Array * arrayRowidsIn ,
struct Array * aMetadataIn ,
const char * idxStr , int argc , sqlite3_value * * argv ,
void * queryVector , i64 k , i64 * * out_topk_rowids ,
f32 * * out_topk_distances , i64 * out_used ) {
// for each chunk, get top min(k, chunk_size) rowid + distances to query vec.
// then reconcile all topk_chunks for a true top k.
// output only rowids + distances for now
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
int rc = SQLITE_OK ;
sqlite3_blob * blobVectors = NULL ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
void * baseVectors = NULL ; // memory: chunk_size * dimensions * element_size
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
// OWNED BY CALLER ON SUCCESS
i64 * topk_rowids = NULL ; // memory: k * 4
// OWNED BY CALLER ON SUCCESS
f32 * topk_distances = NULL ; // memory: k * 4
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
i64 * tmp_topk_rowids = NULL ; // memory: k * 4
f32 * tmp_topk_distances = NULL ; // memory: k * 4
f32 * chunk_distances = NULL ; // memory: chunk_size * 4
u8 * b = NULL ; // memory: chunk_size / 8
u8 * bTaken = NULL ; // memory: chunk_size / 8
i32 * chunk_topk_idxs = NULL ; // memory: k * 4
u8 * bmRowids = NULL ; // memory: chunk_size / 8
u8 * bmMetadata = NULL ; // memory: chunk_size / 8
// // total: a lot???
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
// 6 * (k * 4) + (k * 2) + (chunk_size / 8) + (chunk_size * dimensions * 4)
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
topk_rowids = sqlite3_malloc ( k * sizeof ( i64 ) ) ;
if ( ! topk_rowids ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
memset ( topk_rowids , 0 , k * sizeof ( i64 ) ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
topk_distances = sqlite3_malloc ( k * sizeof ( f32 ) ) ;
if ( ! topk_distances ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
memset ( topk_distances , 0 , k * sizeof ( f32 ) ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
tmp_topk_rowids = sqlite3_malloc ( k * sizeof ( i64 ) ) ;
if ( ! tmp_topk_rowids ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
memset ( tmp_topk_rowids , 0 , k * sizeof ( i64 ) ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
tmp_topk_distances = sqlite3_malloc ( k * sizeof ( f32 ) ) ;
if ( ! tmp_topk_distances ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
memset ( tmp_topk_distances , 0 , k * sizeof ( f32 ) ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
i64 k_used = 0 ;
i64 baseVectorsSize = p - > chunk_size * vector_column_byte_size ( * vector_column ) ;
baseVectors = sqlite3_malloc ( baseVectorsSize ) ;
if ( ! baseVectors ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
chunk_distances = sqlite3_malloc ( p - > chunk_size * sizeof ( f32 ) ) ;
if ( ! chunk_distances ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
b = bitmap_new ( p - > chunk_size ) ;
if ( ! b ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
bTaken = bitmap_new ( p - > chunk_size ) ;
if ( ! bTaken ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
chunk_topk_idxs = sqlite3_malloc ( k * sizeof ( i32 ) ) ;
if ( ! chunk_topk_idxs ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
bmRowids = arrayRowidsIn ? bitmap_new ( p - > chunk_size ) : NULL ;
if ( arrayRowidsIn & & ! bmRowids ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
sqlite3_blob * metadataBlobs [ VEC0_MAX_METADATA_COLUMNS ] ;
memset ( metadataBlobs , 0 , sizeof ( sqlite3_blob * ) * VEC0_MAX_METADATA_COLUMNS ) ;
bmMetadata = bitmap_new ( p - > chunk_size ) ;
if ( ! bmMetadata ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
int idxStrLength = strlen ( idxStr ) ;
int numValueEntries = ( idxStrLength - 1 ) / 4 ;
assert ( numValueEntries = = argc ) ;
int hasMetadataFilters = 0 ;
int hasDistanceConstraints = 0 ;
for ( int i = 0 ; i < argc ; i + + ) {
int idx = 1 + ( i * 4 ) ;
char kind = idxStr [ idx + 0 ] ;
if ( kind = = VEC0_IDXSTR_KIND_METADATA_CONSTRAINT ) {
hasMetadataFilters = 1 ;
}
else if ( kind = = VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT ) {
hasDistanceConstraints = 1 ;
}
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
while ( true ) {
rc = sqlite3_step ( stmtChunks ) ;
if ( rc = = SQLITE_DONE ) {
2024-07-05 12:07:45 -07:00
break ;
}
if ( rc ! = SQLITE_ROW ) {
vtab_set_error ( & p - > base , " chunks iter error " ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
memset ( chunk_distances , 0 , p - > chunk_size * sizeof ( f32 ) ) ;
memset ( chunk_topk_idxs , 0 , k * sizeof ( i32 ) ) ;
bitmap_clear ( b , p - > chunk_size ) ;
i64 chunk_id = sqlite3_column_int64 ( stmtChunks , 0 ) ;
unsigned char * chunkValidity =
( unsigned char * ) sqlite3_column_blob ( stmtChunks , 1 ) ;
i64 validitySize = sqlite3_column_bytes ( stmtChunks , 1 ) ;
if ( validitySize ! = p - > chunk_size / CHAR_BIT ) {
// IMP: V05271_22109
vtab_set_error (
& p - > base ,
" chunk validity size doesn't match - expected %lld, found %lld " ,
p - > chunk_size / CHAR_BIT , validitySize ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
i64 * chunkRowids = ( i64 * ) sqlite3_column_blob ( stmtChunks , 2 ) ;
i64 rowidsSize = sqlite3_column_bytes ( stmtChunks , 2 ) ;
if ( rowidsSize ! = p - > chunk_size * sizeof ( i64 ) ) {
// IMP: V02796_19635
vtab_set_error ( & p - > base , " rowids size doesn't match " ) ;
vtab_set_error (
& p - > base ,
" chunk rowids size doesn't match - expected %lld, found %lld " ,
p - > chunk_size * sizeof ( i64 ) , rowidsSize ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
// open the vector chunk blob for the current chunk
rc = sqlite3_blob_open ( p - > db , p - > schemaName ,
p - > shadowVectorChunksNames [ vectorColumnIdx ] ,
" vectors " , chunk_id , 0 , & blobVectors ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , " could not open vectors blob for chunk %lld " ,
chunk_id ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
i64 currentBaseVectorsSize = sqlite3_blob_bytes ( blobVectors ) ;
i64 expectedBaseVectorsSize =
p - > chunk_size * vector_column_byte_size ( * vector_column ) ;
if ( currentBaseVectorsSize ! = expectedBaseVectorsSize ) {
// IMP: V16465_00535
vtab_set_error (
& p - > base ,
" vectors blob size doesn't match - expected %lld, found %lld " ,
expectedBaseVectorsSize , currentBaseVectorsSize ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
rc = sqlite3_blob_read ( blobVectors , baseVectors , currentBaseVectorsSize , 0 ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , " vectors blob read error for %lld " , chunk_id ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
bitmap_copy ( b , chunkValidity , p - > chunk_size ) ;
if ( arrayRowidsIn ) {
bitmap_clear ( bmRowids , p - > chunk_size ) ;
for ( int i = 0 ; i < p - > chunk_size ; i + + ) {
if ( ! bitmap_get ( chunkValidity , i ) ) {
continue ;
}
i64 rowid = chunkRowids [ i ] ;
void * in = bsearch ( & rowid , arrayRowidsIn - > z , arrayRowidsIn - > length ,
sizeof ( i64 ) , _cmp ) ;
bitmap_set ( bmRowids , i , in ? 1 : 0 ) ;
}
bitmap_and_inplace ( b , bmRowids , p - > chunk_size ) ;
}
2024-11-20 00:59:34 -08:00
if ( hasMetadataFilters ) {
for ( int i = 0 ; i < argc ; i + + ) {
int idx = 1 + ( i * 4 ) ;
char kind = idxStr [ idx + 0 ] ;
if ( kind ! = VEC0_IDXSTR_KIND_METADATA_CONSTRAINT ) {
continue ;
}
int metadata_idx = idxStr [ idx + 1 ] - ' A ' ;
int operator = idxStr [ idx + 2 ] ;
if ( ! metadataBlobs [ metadata_idx ] ) {
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowMetadataChunksNames [ metadata_idx ] , " data " , chunk_id , 0 , & metadataBlobs [ metadata_idx ] ) ;
vtab_set_error ( & p - > base , " Could not open metadata blob " ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
}
bitmap_clear ( bmMetadata , p - > chunk_size ) ;
rc = vec0_set_metadata_filter_bitmap ( p , metadata_idx , operator , argv [ i ] , metadataBlobs [ metadata_idx ] , chunk_id , bmMetadata , p - > chunk_size , aMetadataIn , i ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , " Could not filter metadata fields " ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
}
bitmap_and_inplace ( b , bmMetadata , p - > chunk_size ) ;
}
}
2024-07-05 12:07:45 -07:00
for ( int i = 0 ; i < p - > chunk_size ; i + + ) {
if ( ! bitmap_get ( b , i ) ) {
continue ;
} ;
f32 result ;
switch ( vector_column - > element_type ) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 : {
const f32 * base_i =
( ( f32 * ) baseVectors ) + ( i * vector_column - > dimensions ) ;
switch ( vector_column - > distance_metric ) {
case VEC0_DISTANCE_METRIC_L2 : {
result = distance_l2_sqr_float ( base_i , ( f32 * ) queryVector ,
& vector_column - > dimensions ) ;
break ;
}
2024-07-23 14:04:17 -07:00
case VEC0_DISTANCE_METRIC_L1 : {
result = distance_l1_f32 ( base_i , ( f32 * ) queryVector ,
2024-07-23 23:57:42 -07:00
& vector_column - > dimensions ) ;
2024-07-23 14:04:17 -07:00
break ;
}
2024-07-05 12:07:45 -07:00
case VEC0_DISTANCE_METRIC_COSINE : {
result = distance_cosine_float ( base_i , ( f32 * ) queryVector ,
& vector_column - > dimensions ) ;
break ;
}
}
break ;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8 : {
const i8 * base_i =
( ( i8 * ) baseVectors ) + ( i * vector_column - > dimensions ) ;
switch ( vector_column - > distance_metric ) {
case VEC0_DISTANCE_METRIC_L2 : {
result = distance_l2_sqr_int8 ( base_i , ( i8 * ) queryVector ,
& vector_column - > dimensions ) ;
break ;
}
2024-07-23 14:04:17 -07:00
case VEC0_DISTANCE_METRIC_L1 : {
result = distance_l1_int8 ( base_i , ( i8 * ) queryVector ,
2024-07-23 23:57:42 -07:00
& vector_column - > dimensions ) ;
2024-07-23 14:04:17 -07:00
break ;
}
2024-07-05 12:07:45 -07:00
case VEC0_DISTANCE_METRIC_COSINE : {
result = distance_cosine_int8 ( base_i , ( i8 * ) queryVector ,
& vector_column - > dimensions ) ;
break ;
}
}
break ;
}
case SQLITE_VEC_ELEMENT_TYPE_BIT : {
const u8 * base_i =
( ( u8 * ) baseVectors ) + ( i * ( vector_column - > dimensions / CHAR_BIT ) ) ;
result = distance_hamming ( base_i , ( u8 * ) queryVector ,
& vector_column - > dimensions ) ;
break ;
}
}
chunk_distances [ i ] = result ;
}
2026-02-13 06:38:26 -08:00
if ( hasDistanceConstraints ) {
for ( int i = 0 ; i < argc ; i + + ) {
int idx = 1 + ( i * 4 ) ;
char kind = idxStr [ idx + 0 ] ;
// TODO casts f64 to f32, is that a problem?
f32 target = ( f32 ) sqlite3_value_double ( argv [ i ] ) ;
if ( kind ! = VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT ) {
continue ;
}
vec0_distance_constraint_operator op = idxStr [ idx + 1 ] ;
switch ( op ) {
case VEC0_DISTANCE_CONSTRAINT_GE : {
for ( int i = 0 ; i < p - > chunk_size ; i + + ) {
if ( bitmap_get ( b , i ) & & ! ( chunk_distances [ i ] > = target ) ) {
bitmap_set ( b , i , 0 ) ;
}
}
break ;
}
case VEC0_DISTANCE_CONSTRAINT_GT : {
for ( int i = 0 ; i < p - > chunk_size ; i + + ) {
if ( bitmap_get ( b , i ) & & ! ( chunk_distances [ i ] > target ) ) {
bitmap_set ( b , i , 0 ) ;
}
}
break ;
}
case VEC0_DISTANCE_CONSTRAINT_LE : {
for ( int i = 0 ; i < p - > chunk_size ; i + + ) {
if ( bitmap_get ( b , i ) & & ! ( chunk_distances [ i ] < = target ) ) {
bitmap_set ( b , i , 0 ) ;
}
}
break ;
}
case VEC0_DISTANCE_CONSTRAINT_LT : {
for ( int i = 0 ; i < p - > chunk_size ; i + + ) {
if ( bitmap_get ( b , i ) & & ! ( chunk_distances [ i ] < target ) ) {
bitmap_set ( b , i , 0 ) ;
}
}
break ;
}
}
}
}
2024-07-05 12:07:45 -07:00
int used1 ;
min_idx ( chunk_distances , p - > chunk_size , b , chunk_topk_idxs ,
min ( k , p - > chunk_size ) , bTaken , & used1 ) ;
i64 used ;
merge_sorted_lists ( topk_distances , topk_rowids , k_used , chunk_distances ,
chunkRowids , chunk_topk_idxs ,
min ( min ( k , p - > chunk_size ) , used1 ) , tmp_topk_distances ,
tmp_topk_rowids , k , & used ) ;
for ( int i = 0 ; i < used ; i + + ) {
topk_rowids [ i ] = tmp_topk_rowids [ i ] ;
topk_distances [ i ] = tmp_topk_distances [ i ] ;
}
k_used = used ;
2024-07-23 23:57:42 -07:00
// blobVectors is always opened with read-only permissions, so this never
// fails.
2024-07-05 12:07:45 -07:00
sqlite3_blob_close ( blobVectors ) ;
blobVectors = NULL ;
}
* out_topk_rowids = topk_rowids ;
* out_topk_distances = topk_distances ;
* out_used = k_used ;
rc = SQLITE_OK ;
cleanup :
if ( rc ! = SQLITE_OK ) {
sqlite3_free ( topk_rowids ) ;
sqlite3_free ( topk_distances ) ;
}
sqlite3_free ( chunk_topk_idxs ) ;
sqlite3_free ( tmp_topk_rowids ) ;
sqlite3_free ( tmp_topk_distances ) ;
sqlite3_free ( b ) ;
sqlite3_free ( bTaken ) ;
sqlite3_free ( bmRowids ) ;
sqlite3_free ( baseVectors ) ;
sqlite3_free ( chunk_distances ) ;
2024-11-20 00:59:34 -08:00
sqlite3_free ( bmMetadata ) ;
for ( int i = 0 ; i < VEC0_MAX_METADATA_COLUMNS ; i + + ) {
sqlite3_blob_close ( metadataBlobs [ i ] ) ;
}
2024-07-23 23:57:42 -07:00
// blobVectors is always opened with read-only permissions, so this never
// fails.
2024-07-05 12:07:45 -07:00
sqlite3_blob_close ( blobVectors ) ;
return rc ;
}
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
# include "sqlite-vec-rescore.c"
# endif
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
/**
* Handle a KNN query using the DiskANN graph search .
*/
static int vec0Filter_knn_diskann (
vec0_cursor * pCur , vec0_vtab * p , int idxNum ,
const char * idxStr , int argc , sqlite3_value * * argv ) {
int rc ;
int vectorColumnIdx = idxNum ;
struct VectorColumnDefinition * vector_column = & p - > vector_columns [ vectorColumnIdx ] ;
struct vec0_query_knn_data * knn_data ;
knn_data = sqlite3_malloc ( sizeof ( * knn_data ) ) ;
if ( ! knn_data ) return SQLITE_NOMEM ;
memset ( knn_data , 0 , sizeof ( * knn_data ) ) ;
// Parse query_idx and k_idx from idxStr
int query_idx = - 1 ;
int k_idx = - 1 ;
for ( int i = 0 ; i < argc ; i + + ) {
if ( idxStr [ 1 + ( i * 4 ) ] = = VEC0_IDXSTR_KIND_KNN_MATCH ) {
query_idx = i ;
}
if ( idxStr [ 1 + ( i * 4 ) ] = = VEC0_IDXSTR_KIND_KNN_K ) {
k_idx = i ;
}
}
assert ( query_idx > = 0 ) ;
assert ( k_idx > = 0 ) ;
// Extract query vector
void * queryVector ;
size_t dimensions ;
enum VectorElementType elementType ;
vector_cleanup queryVectorCleanup = vector_cleanup_noop ;
char * pzError ;
rc = vector_from_value ( argv [ query_idx ] , & queryVector , & dimensions ,
& elementType , & queryVectorCleanup , & pzError ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , " Invalid query vector: %z " , pzError ) ;
sqlite3_free ( knn_data ) ;
return SQLITE_ERROR ;
}
if ( elementType ! = vector_column - > element_type | |
dimensions ! = vector_column - > dimensions ) {
vtab_set_error ( & p - > base , " Query vector type/dimension mismatch " ) ;
queryVectorCleanup ( queryVector ) ;
sqlite3_free ( knn_data ) ;
return SQLITE_ERROR ;
}
i64 k = sqlite3_value_int64 ( argv [ k_idx ] ) ;
if ( k < = 0 ) {
knn_data - > k = 0 ;
knn_data - > k_used = 0 ;
pCur - > knn_data = knn_data ;
pCur - > query_plan = VEC0_QUERY_PLAN_KNN ;
queryVectorCleanup ( queryVector ) ;
return SQLITE_OK ;
}
// Run DiskANN search
i64 * resultRowids = sqlite3_malloc ( k * sizeof ( i64 ) ) ;
f32 * resultDistances = sqlite3_malloc ( k * sizeof ( f32 ) ) ;
if ( ! resultRowids | | ! resultDistances ) {
sqlite3_free ( resultRowids ) ;
sqlite3_free ( resultDistances ) ;
queryVectorCleanup ( queryVector ) ;
sqlite3_free ( knn_data ) ;
return SQLITE_NOMEM ;
}
int resultCount ;
rc = diskann_search ( p , vectorColumnIdx , queryVector , dimensions ,
elementType , ( int ) k , 0 ,
resultRowids , resultDistances , & resultCount ) ;
if ( rc ! = SQLITE_OK ) {
queryVectorCleanup ( queryVector ) ;
sqlite3_free ( resultRowids ) ;
sqlite3_free ( resultDistances ) ;
sqlite3_free ( knn_data ) ;
return rc ;
}
// Scan _diskann_buffer for any buffered (unflushed) vectors and merge
// with graph results. This ensures no recall loss for buffered vectors.
{
sqlite3_stmt * bufStmt = NULL ;
char * zSql = sqlite3_mprintf (
" SELECT rowid, vector FROM " VEC0_SHADOW_DISKANN_BUFFER_N_NAME ,
p - > schemaName , p - > tableName , vectorColumnIdx ) ;
if ( ! zSql ) {
queryVectorCleanup ( queryVector ) ;
sqlite3_free ( resultRowids ) ;
sqlite3_free ( resultDistances ) ;
sqlite3_free ( knn_data ) ;
return SQLITE_NOMEM ;
}
int bufRc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & bufStmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( bufRc = = SQLITE_OK ) {
while ( sqlite3_step ( bufStmt ) = = SQLITE_ROW ) {
i64 bufRowid = sqlite3_column_int64 ( bufStmt , 0 ) ;
const void * bufVec = sqlite3_column_blob ( bufStmt , 1 ) ;
f32 dist = vec0_distance_full (
queryVector , bufVec , dimensions , elementType ,
vector_column - > distance_metric ) ;
// Check if this buffer vector should replace the worst graph result
if ( resultCount < ( int ) k ) {
// Still have room, just add it
resultRowids [ resultCount ] = bufRowid ;
resultDistances [ resultCount ] = dist ;
resultCount + + ;
} else {
// Find worst (largest distance) in results
int worstIdx = 0 ;
for ( int wi = 1 ; wi < resultCount ; wi + + ) {
if ( resultDistances [ wi ] > resultDistances [ worstIdx ] ) {
worstIdx = wi ;
}
}
if ( dist < resultDistances [ worstIdx ] ) {
resultRowids [ worstIdx ] = bufRowid ;
resultDistances [ worstIdx ] = dist ;
}
}
}
sqlite3_finalize ( bufStmt ) ;
}
}
queryVectorCleanup ( queryVector ) ;
// Sort results by distance (ascending)
for ( int si = 0 ; si < resultCount - 1 ; si + + ) {
for ( int sj = si + 1 ; sj < resultCount ; sj + + ) {
if ( resultDistances [ sj ] < resultDistances [ si ] ) {
f32 tmpD = resultDistances [ si ] ;
resultDistances [ si ] = resultDistances [ sj ] ;
resultDistances [ sj ] = tmpD ;
i64 tmpR = resultRowids [ si ] ;
resultRowids [ si ] = resultRowids [ sj ] ;
resultRowids [ sj ] = tmpR ;
}
}
}
knn_data - > k = resultCount ;
knn_data - > k_used = resultCount ;
knn_data - > rowids = resultRowids ;
knn_data - > distances = resultDistances ;
knn_data - > current_idx = 0 ;
pCur - > knn_data = knn_data ;
pCur - > query_plan = VEC0_QUERY_PLAN_KNN ;
return SQLITE_OK ;
}
# endif /* SQLITE_VEC_ENABLE_DISKANN */
2024-04-20 13:38:58 -07:00
int vec0Filter_knn ( vec0_cursor * pCur , vec0_vtab * p , int idxNum ,
const char * idxStr , int argc , sqlite3_value * * argv ) {
2024-11-20 00:02:04 -08:00
assert ( argc = = ( strlen ( idxStr ) - 1 ) / 4 ) ;
2026-03-29 19:44:44 -07:00
int rc ;
struct vec0_query_knn_data * knn_data ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
int vectorColumnIdx = idxNum ;
struct VectorColumnDefinition * vector_column =
& p - > vector_columns [ vectorColumnIdx ] ;
2024-04-20 13:38:58 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
// DiskANN dispatch
if ( vector_column - > index_type = = VEC0_INDEX_TYPE_DISKANN ) {
return vec0Filter_knn_diskann ( pCur , p , idxNum , idxStr , argc , argv ) ;
}
# endif
2026-03-29 19:44:44 -07:00
struct Array * arrayRowidsIn = NULL ;
sqlite3_stmt * stmtChunks = NULL ;
void * queryVector ;
size_t dimensions ;
enum VectorElementType elementType ;
vector_cleanup queryVectorCleanup = vector_cleanup_noop ;
char * pzError ;
knn_data = sqlite3_malloc ( sizeof ( * knn_data ) ) ;
if ( ! knn_data ) {
return SQLITE_NOMEM ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
memset ( knn_data , 0 , sizeof ( * knn_data ) ) ;
// array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints
struct Array * aMetadataIn = NULL ;
int query_idx = - 1 ;
int k_idx = - 1 ;
int rowid_in_idx = - 1 ;
for ( int i = 0 ; i < argc ; i + + ) {
if ( idxStr [ 1 + ( i * 4 ) ] = = VEC0_IDXSTR_KIND_KNN_MATCH ) {
query_idx = i ;
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
if ( idxStr [ 1 + ( i * 4 ) ] = = VEC0_IDXSTR_KIND_KNN_K ) {
k_idx = i ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
if ( idxStr [ 1 + ( i * 4 ) ] = = VEC0_IDXSTR_KIND_KNN_ROWID_IN ) {
rowid_in_idx = i ;
2024-11-20 00:59:34 -08:00
}
}
2026-03-29 19:44:44 -07:00
assert ( query_idx > = 0 ) ;
assert ( k_idx > = 0 ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
// make sure the query vector matches the vector column (type dimensions etc.)
rc = vector_from_value ( argv [ query_idx ] , & queryVector , & dimensions , & elementType ,
& queryVectorCleanup , & pzError ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
" Query vector on the \" %.*s \" column is invalid: %z " ,
vector_column - > name_length , vector_column - > name , pzError ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
if ( elementType ! = vector_column - > element_type ) {
vtab_set_error (
& p - > base ,
" Query vector for the \" %.*s \" column is expected to be of type "
" %s, but a %s vector was provided. " ,
vector_column - > name_length , vector_column - > name ,
vector_subtype_name ( vector_column - > element_type ) ,
vector_subtype_name ( elementType ) ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
if ( dimensions ! = vector_column - > dimensions ) {
vtab_set_error (
& p - > base ,
" Dimension mismatch for query vector for the \" %.*s \" column. "
" Expected %d dimensions but received %d. " ,
vector_column - > name_length , vector_column - > name ,
vector_column - > dimensions , dimensions ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
i64 k = sqlite3_value_int64 ( argv [ k_idx ] ) ;
if ( k < 0 ) {
vtab_set_error (
& p - > base , " k value in knn queries must be greater than or equal to 0. " ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
# define SQLITE_VEC_VEC0_K_MAX 4096
if ( k > SQLITE_VEC_VEC0_K_MAX ) {
vtab_set_error (
& p - > base ,
" k value in knn query too large, provided %lld and the limit is %lld " ,
k , SQLITE_VEC_VEC0_K_MAX ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
if ( k = = 0 ) {
knn_data - > k = 0 ;
pCur - > knn_data = knn_data ;
pCur - > query_plan = VEC0_QUERY_PLAN_KNN ;
rc = SQLITE_OK ;
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// handle when a `rowid in (...)` operation was provided
// Array of all the rowids that appear in any `rowid in (...)` constraint.
// NULL if none were provided, which means a "full" scan.
# if COMPILER_SUPPORTS_VTAB_IN
if ( rowid_in_idx > = 0 ) {
sqlite3_value * item ;
int rc ;
arrayRowidsIn = sqlite3_malloc ( sizeof ( * arrayRowidsIn ) ) ;
if ( ! arrayRowidsIn ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
memset ( arrayRowidsIn , 0 , sizeof ( * arrayRowidsIn ) ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = array_init ( arrayRowidsIn , sizeof ( i64 ) , 32 ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
for ( rc = sqlite3_vtab_in_first ( argv [ rowid_in_idx ] , & item ) ; rc = = SQLITE_OK & & item ;
rc = sqlite3_vtab_in_next ( argv [ rowid_in_idx ] , & item ) ) {
i64 rowid ;
if ( p - > pkIsText ) {
rc = vec0_rowid_from_id ( p , item , & rowid ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
} else {
rowid = sqlite3_value_int64 ( item ) ;
}
rc = array_append ( arrayRowidsIn , & rowid ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
}
if ( rc ! = SQLITE_DONE ) {
vtab_set_error ( & p - > base , " error processing rowid in (...) array " ) ;
goto cleanup ;
}
qsort ( arrayRowidsIn - > z , arrayRowidsIn - > length , arrayRowidsIn - > element_size ,
_cmp ) ;
}
# endif
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
# if COMPILER_SUPPORTS_VTAB_IN
for ( int i = 0 ; i < argc ; i + + ) {
if ( ! ( idxStr [ 1 + ( i * 4 ) ] = = VEC0_IDXSTR_KIND_METADATA_CONSTRAINT & & idxStr [ 1 + ( i * 4 ) + 2 ] = = VEC0_METADATA_OPERATOR_IN ) ) {
continue ;
}
int metadata_idx = idxStr [ 1 + ( i * 4 ) + 1 ] - ' A ' ;
if ( ! aMetadataIn ) {
aMetadataIn = sqlite3_malloc ( sizeof ( * aMetadataIn ) ) ;
if ( ! aMetadataIn ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
memset ( aMetadataIn , 0 , sizeof ( * aMetadataIn ) ) ;
rc = array_init ( aMetadataIn , sizeof ( struct Vec0MetadataIn ) , 8 ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
2024-06-23 17:46:42 -07:00
}
2026-03-29 19:44:44 -07:00
struct Vec0MetadataIn item ;
memset ( & item , 0 , sizeof ( item ) ) ;
item . metadata_idx = metadata_idx ;
item . argv_idx = i ;
2024-06-23 17:46:42 -07:00
2026-03-29 19:44:44 -07:00
switch ( p - > metadata_columns [ metadata_idx ] . kind ) {
case VEC0_METADATA_COLUMN_KIND_INTEGER : {
rc = array_init ( & item . array , sizeof ( i64 ) , 16 ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
sqlite3_value * entry ;
for ( rc = sqlite3_vtab_in_first ( argv [ i ] , & entry ) ; rc = = SQLITE_OK & & entry ; rc = sqlite3_vtab_in_next ( argv [ i ] , & entry ) ) {
i64 v = sqlite3_value_int64 ( entry ) ;
rc = array_append ( & item . array , & v ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
}
2024-06-23 17:46:42 -07:00
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_DONE ) {
vtab_set_error ( & p - > base , " Error fetching next value in `x in (...)` integer expression " ) ;
goto cleanup ;
}
2024-06-23 17:46:42 -07:00
2026-03-29 19:44:44 -07:00
break ;
}
case VEC0_METADATA_COLUMN_KIND_TEXT : {
rc = array_init ( & item . array , sizeof ( struct Vec0MetadataInTextEntry ) , 16 ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
sqlite3_value * entry ;
for ( rc = sqlite3_vtab_in_first ( argv [ i ] , & entry ) ; rc = = SQLITE_OK & & entry ; rc = sqlite3_vtab_in_next ( argv [ i ] , & entry ) ) {
const char * s = ( const char * ) sqlite3_value_text ( entry ) ;
int n = sqlite3_value_bytes ( entry ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
struct Vec0MetadataInTextEntry entry ;
entry . zString = sqlite3_mprintf ( " %.*s " , n , s ) ;
if ( ! entry . zString ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
entry . n = n ;
rc = array_append ( & item . array , & entry ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_DONE ) {
vtab_set_error ( & p - > base , " Error fetching next value in `x in (...)` text expression " ) ;
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
break ;
}
default : {
vtab_set_error ( & p - > base , " Internal sqlite-vec error " ) ;
goto cleanup ;
}
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = array_append ( aMetadataIn , & item ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
2024-06-23 17:46:42 -07:00
}
2026-03-29 19:44:44 -07:00
# endif
2024-04-20 13:38:58 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
// Dispatch to rescore KNN path if this vector column has rescore enabled
if ( vector_column - > index_type = = VEC0_INDEX_TYPE_RESCORE ) {
rc = rescore_knn ( p , pCur , vector_column , vectorColumnIdx , arrayRowidsIn ,
aMetadataIn , idxStr , argc , argv , queryVector , k , knn_data ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
pCur - > knn_data = knn_data ;
pCur - > query_plan = VEC0_QUERY_PLAN_KNN ;
rc = SQLITE_OK ;
goto cleanup ;
}
# endif
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// IVF dispatch: if vector column has IVF, use IVF query instead of chunk scan
if ( vector_column - > index_type = = VEC0_INDEX_TYPE_IVF ) {
rc = ivf_query_knn ( p , vectorColumnIdx , queryVector ,
( int ) vector_column_byte_size ( * vector_column ) , k , knn_data ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
pCur - > knn_data = knn_data ;
pCur - > query_plan = VEC0_QUERY_PLAN_KNN ;
rc = SQLITE_OK ;
goto cleanup ;
}
# endif
2026-03-29 19:44:44 -07:00
rc = vec0_chunks_iter ( p , idxStr , argc , argv , & stmtChunks ) ;
if ( rc ! = SQLITE_OK ) {
// IMP: V06942_23781
vtab_set_error ( & p - > base , " Error preparing stmtChunk: %s " ,
sqlite3_errmsg ( p - > db ) ) ;
2024-06-23 17:46:42 -07:00
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
i64 * topk_rowids = NULL ;
f32 * topk_distances = NULL ;
i64 k_used = 0 ;
rc = vec0Filter_knn_chunks_iter ( p , stmtChunks , vector_column , vectorColumnIdx ,
arrayRowidsIn , aMetadataIn , idxStr , argc , argv , queryVector , k , & topk_rowids ,
& topk_distances , & k_used ) ;
if ( rc ! = SQLITE_OK ) {
2024-06-23 17:46:42 -07:00
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
knn_data - > current_idx = 0 ;
knn_data - > k = k ;
knn_data - > rowids = topk_rowids ;
knn_data - > distances = topk_distances ;
knn_data - > k_used = k_used ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
pCur - > knn_data = knn_data ;
pCur - > query_plan = VEC0_QUERY_PLAN_KNN ;
rc = SQLITE_OK ;
2024-06-23 17:46:42 -07:00
2026-03-29 19:44:44 -07:00
cleanup :
sqlite3_finalize ( stmtChunks ) ;
array_cleanup ( arrayRowidsIn ) ;
sqlite3_free ( arrayRowidsIn ) ;
queryVectorCleanup ( queryVector ) ;
if ( aMetadataIn ) {
for ( size_t i = 0 ; i < aMetadataIn - > length ; i + + ) {
struct Vec0MetadataIn * item = & ( ( struct Vec0MetadataIn * ) aMetadataIn - > z ) [ i ] ;
for ( size_t j = 0 ; j < item - > array . length ; j + + ) {
if ( p - > metadata_columns [ item - > metadata_idx ] . kind = = VEC0_METADATA_COLUMN_KIND_TEXT ) {
struct Vec0MetadataInTextEntry entry = ( ( struct Vec0MetadataInTextEntry * ) item - > array . z ) [ j ] ;
sqlite3_free ( entry . zString ) ;
}
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
array_cleanup ( & item - > array ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
array_cleanup ( aMetadataIn ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_free ( aMetadataIn ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_OK ) {
sqlite3_free ( knn_data ) ;
}
2024-06-23 17:46:42 -07:00
return rc ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
int vec0Filter_fullscan ( vec0_vtab * p , vec0_cursor * pCur ) {
int rc ;
char * zSql ;
struct vec0_query_fullscan_data * fullscan_data ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
fullscan_data = sqlite3_malloc ( sizeof ( * fullscan_data ) ) ;
if ( ! fullscan_data ) {
return SQLITE_NOMEM ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
memset ( fullscan_data , 0 , sizeof ( * fullscan_data ) ) ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
zSql = sqlite3_mprintf ( " SELECT rowid "
" FROM " VEC0_SHADOW_ROWIDS_NAME
" ORDER by chunk_id, chunk_offset " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto error ;
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & fullscan_data - > rowids_stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) {
// IMP: V09901_26739
vtab_set_error ( & p - > base , " Error preparing rowid scan: %s " ,
sqlite3_errmsg ( p - > db ) ) ;
goto error ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_step ( fullscan_data - > rowids_stmt ) ;
2024-07-23 08:59:34 -07:00
2026-03-29 19:44:44 -07:00
// DONE when there's no rowids, ROW when there are, both "success"
if ( ! ( rc = = SQLITE_ROW | | rc = = SQLITE_DONE ) ) {
goto error ;
2024-06-23 17:46:42 -07:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
fullscan_data - > done = rc = = SQLITE_DONE ;
pCur - > query_plan = VEC0_QUERY_PLAN_FULLSCAN ;
pCur - > fullscan_data = fullscan_data ;
return SQLITE_OK ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
error :
vec0_query_fullscan_data_clear ( fullscan_data ) ;
sqlite3_free ( fullscan_data ) ;
return rc ;
}
2024-06-23 17:46:42 -07:00
2026-03-29 19:44:44 -07:00
int vec0Filter_point ( vec0_cursor * pCur , vec0_vtab * p , int argc ,
sqlite3_value * * argv ) {
int rc ;
assert ( argc = = 1 ) ;
i64 rowid ;
struct vec0_query_point_data * point_data = NULL ;
2024-06-23 17:46:42 -07:00
2026-03-29 19:44:44 -07:00
point_data = sqlite3_malloc ( sizeof ( * point_data ) ) ;
if ( ! point_data ) {
rc = SQLITE_NOMEM ;
goto error ;
}
memset ( point_data , 0 , sizeof ( * point_data ) ) ;
if ( p - > pkIsText ) {
rc = vec0_rowid_from_id ( p , argv [ 0 ] , & rowid ) ;
if ( rc = = SQLITE_EMPTY ) {
goto eof ;
}
2024-06-23 17:46:42 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
goto error ;
}
} else {
rowid = sqlite3_value_int64 ( argv [ 0 ] ) ;
}
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
rc = vec0_get_vector_data ( p , rowid , i , & point_data - > vectors [ i ] , NULL ) ;
if ( rc = = SQLITE_EMPTY ) {
goto eof ;
2024-06-23 17:46:42 -07:00
}
2024-07-23 23:57:42 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
goto error ;
2024-07-23 23:57:28 -07:00
}
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
point_data - > rowid = rowid ;
point_data - > done = 0 ;
pCur - > point_data = point_data ;
pCur - > query_plan = VEC0_QUERY_PLAN_POINT ;
return SQLITE_OK ;
eof :
point_data - > rowid = rowid ;
point_data - > done = 1 ;
pCur - > point_data = point_data ;
pCur - > query_plan = VEC0_QUERY_PLAN_POINT ;
return SQLITE_OK ;
error :
vec0_query_point_data_clear ( point_data ) ;
sqlite3_free ( point_data ) ;
return rc ;
}
static int vec0Filter ( sqlite3_vtab_cursor * pVtabCursor , int idxNum ,
const char * idxStr , int argc , sqlite3_value * * argv ) {
vec0_vtab * p = ( vec0_vtab * ) pVtabCursor - > pVtab ;
vec0_cursor * pCur = ( vec0_cursor * ) pVtabCursor ;
vec0_cursor_clear ( pCur ) ;
int idxStrLength = strlen ( idxStr ) ;
if ( idxStrLength < = 0 ) {
return SQLITE_ERROR ;
2024-06-23 17:46:42 -07:00
}
2026-03-29 19:44:44 -07:00
if ( ( idxStrLength - 1 ) % 4 ! = 0 ) {
return SQLITE_ERROR ;
}
int numValueEntries = ( idxStrLength - 1 ) / 4 ;
if ( numValueEntries ! = argc ) {
return SQLITE_ERROR ;
}
char query_plan = idxStr [ 0 ] ;
switch ( query_plan ) {
case VEC0_QUERY_PLAN_FULLSCAN :
return vec0Filter_fullscan ( p , pCur ) ;
case VEC0_QUERY_PLAN_KNN :
return vec0Filter_knn ( pCur , p , idxNum , idxStr , argc , argv ) ;
case VEC0_QUERY_PLAN_POINT :
return vec0Filter_point ( pCur , p , argc , argv ) ;
default :
vtab_set_error ( pVtabCursor - > pVtab , " unknown idxStr '%s' " , idxStr ) ;
return SQLITE_ERROR ;
}
}
static int vec0Rowid ( sqlite3_vtab_cursor * cur , sqlite_int64 * pRowid ) {
vec0_cursor * pCur = ( vec0_cursor * ) cur ;
switch ( pCur - > query_plan ) {
case VEC0_QUERY_PLAN_FULLSCAN : {
* pRowid = sqlite3_column_int64 ( pCur - > fullscan_data - > rowids_stmt , 0 ) ;
return SQLITE_OK ;
}
case VEC0_QUERY_PLAN_POINT : {
* pRowid = pCur - > point_data - > rowid ;
return SQLITE_OK ;
2024-06-23 17:46:42 -07:00
}
2026-03-29 19:44:44 -07:00
case VEC0_QUERY_PLAN_KNN : {
vtab_set_error ( cur - > pVtab ,
" Internal sqlite-vec error: expected point query plan in "
" vec0Rowid, found %d " ,
pCur - > query_plan ) ;
return SQLITE_ERROR ;
2024-06-23 17:46:42 -07:00
}
2024-07-23 23:57:42 -07:00
}
2026-03-29 19:44:44 -07:00
return SQLITE_ERROR ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
static int vec0Next ( sqlite3_vtab_cursor * cur ) {
vec0_cursor * pCur = ( vec0_cursor * ) cur ;
switch ( pCur - > query_plan ) {
case VEC0_QUERY_PLAN_FULLSCAN : {
if ( ! pCur - > fullscan_data ) {
return SQLITE_ERROR ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
int rc = sqlite3_step ( pCur - > fullscan_data - > rowids_stmt ) ;
if ( rc = = SQLITE_DONE ) {
pCur - > fullscan_data - > done = 1 ;
return SQLITE_OK ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
if ( rc = = SQLITE_ROW ) {
return SQLITE_OK ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
return SQLITE_ERROR ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_QUERY_PLAN_KNN : {
if ( ! pCur - > knn_data ) {
return SQLITE_ERROR ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
pCur - > knn_data - > current_idx + + ;
return SQLITE_OK ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
case VEC0_QUERY_PLAN_POINT : {
if ( ! pCur - > point_data ) {
return SQLITE_ERROR ;
}
pCur - > point_data - > done = 1 ;
return SQLITE_OK ;
}
}
return SQLITE_ERROR ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
static int vec0Eof ( sqlite3_vtab_cursor * cur ) {
vec0_cursor * pCur = ( vec0_cursor * ) cur ;
switch ( pCur - > query_plan ) {
case VEC0_QUERY_PLAN_FULLSCAN : {
if ( ! pCur - > fullscan_data ) {
return 1 ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
return pCur - > fullscan_data - > done ;
}
case VEC0_QUERY_PLAN_KNN : {
if ( ! pCur - > knn_data ) {
return 1 ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
// return (pCur->knn_data->current_idx >= pCur->knn_data->k) ||
// (pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX);
return ( pCur - > knn_data - > current_idx > = pCur - > knn_data - > k_used ) ;
}
case VEC0_QUERY_PLAN_POINT : {
if ( ! pCur - > point_data ) {
return 1 ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
return pCur - > point_data - > done ;
}
}
return 1 ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
static int vec0Column_fullscan ( vec0_vtab * pVtab , vec0_cursor * pCur ,
sqlite3_context * context , int i ) {
if ( ! pCur - > fullscan_data ) {
sqlite3_result_error (
context , " Internal sqlite-vec error: fullscan_data is NULL. " , - 1 ) ;
return SQLITE_ERROR ;
}
i64 rowid = sqlite3_column_int64 ( pCur - > fullscan_data - > rowids_stmt , 0 ) ;
if ( i = = VEC0_COLUMN_ID ) {
return vec0_result_id ( pVtab , context , rowid ) ;
}
else if ( vec0_column_idx_is_vector ( pVtab , i ) ) {
void * v ;
int sz ;
int vector_idx = vec0_column_idx_to_vector_idx ( pVtab , i ) ;
int rc = vec0_get_vector_data ( pVtab , rowid , vector_idx , & v , & sz ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
sqlite3_result_blob ( context , v , sz , sqlite3_free ) ;
sqlite3_result_subtype ( context ,
pVtab - > vector_columns [ vector_idx ] . element_type ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
}
else if ( i = = vec0_column_distance_idx ( pVtab ) ) {
sqlite3_result_null ( context ) ;
}
else if ( vec0_column_idx_is_partition ( pVtab , i ) ) {
int partition_idx = vec0_column_idx_to_partition_idx ( pVtab , i ) ;
sqlite3_value * v ;
int rc = vec0_get_partition_value_for_rowid ( pVtab , rowid , partition_idx , & v ) ;
if ( rc = = SQLITE_OK ) {
sqlite3_result_value ( context , v ) ;
sqlite3_value_free ( v ) ;
} else {
sqlite3_result_error_code ( context , rc ) ;
}
}
else if ( vec0_column_idx_is_auxiliary ( pVtab , i ) ) {
int auxiliary_idx = vec0_column_idx_to_auxiliary_idx ( pVtab , i ) ;
sqlite3_value * v ;
int rc = vec0_get_auxiliary_value_for_rowid ( pVtab , rowid , auxiliary_idx , & v ) ;
if ( rc = = SQLITE_OK ) {
sqlite3_result_value ( context , v ) ;
sqlite3_value_free ( v ) ;
} else {
sqlite3_result_error_code ( context , rc ) ;
}
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
else if ( vec0_column_idx_is_metadata ( pVtab , i ) ) {
if ( sqlite3_vtab_nochange ( context ) ) {
return SQLITE_OK ;
}
int metadata_idx = vec0_column_idx_to_metadata_idx ( pVtab , i ) ;
int rc = vec0_result_metadata_value_for_rowid ( pVtab , rowid , metadata_idx , context ) ;
if ( rc ! = SQLITE_OK ) {
// IMP: V15466_32305
const char * zErr = sqlite3_mprintf (
" Could not extract metadata value for column %.*s at rowid %lld " ,
pVtab - > metadata_columns [ metadata_idx ] . name_length ,
pVtab - > metadata_columns [ metadata_idx ] . name , rowid
) ;
if ( zErr ) {
sqlite3_result_error ( context , zErr , - 1 ) ;
sqlite3_free ( ( void * ) zErr ) ;
} else {
sqlite3_result_error_nomem ( context ) ;
2024-11-20 00:59:34 -08:00
}
}
}
2026-03-29 19:44:44 -07:00
return SQLITE_OK ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
static int vec0Column_point ( vec0_vtab * pVtab , vec0_cursor * pCur ,
sqlite3_context * context , int i ) {
if ( ! pCur - > point_data ) {
sqlite3_result_error ( context ,
" Internal sqlite-vec error: point_data is NULL. " , - 1 ) ;
return SQLITE_ERROR ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
if ( i = = VEC0_COLUMN_ID ) {
return vec0_result_id ( pVtab , context , pCur - > point_data - > rowid ) ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
else if ( i = = vec0_column_distance_idx ( pVtab ) ) {
sqlite3_result_null ( context ) ;
return SQLITE_OK ;
}
else if ( vec0_column_idx_is_vector ( pVtab , i ) ) {
if ( sqlite3_vtab_nochange ( context ) ) {
sqlite3_result_null ( context ) ;
return SQLITE_OK ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
int vector_idx = vec0_column_idx_to_vector_idx ( pVtab , i ) ;
sqlite3_result_blob (
context , pCur - > point_data - > vectors [ vector_idx ] ,
vector_column_byte_size ( pVtab - > vector_columns [ vector_idx ] ) ,
SQLITE_TRANSIENT ) ;
sqlite3_result_subtype ( context ,
pVtab - > vector_columns [ vector_idx ] . element_type ) ;
return SQLITE_OK ;
}
else if ( vec0_column_idx_is_partition ( pVtab , i ) ) {
if ( sqlite3_vtab_nochange ( context ) ) {
return SQLITE_OK ;
}
int partition_idx = vec0_column_idx_to_partition_idx ( pVtab , i ) ;
i64 rowid = pCur - > point_data - > rowid ;
sqlite3_value * v ;
int rc = vec0_get_partition_value_for_rowid ( pVtab , rowid , partition_idx , & v ) ;
if ( rc = = SQLITE_OK ) {
sqlite3_result_value ( context , v ) ;
sqlite3_value_free ( v ) ;
} else {
sqlite3_result_error_code ( context , rc ) ;
}
}
else if ( vec0_column_idx_is_auxiliary ( pVtab , i ) ) {
if ( sqlite3_vtab_nochange ( context ) ) {
return SQLITE_OK ;
}
i64 rowid = pCur - > point_data - > rowid ;
int auxiliary_idx = vec0_column_idx_to_auxiliary_idx ( pVtab , i ) ;
sqlite3_value * v ;
int rc = vec0_get_auxiliary_value_for_rowid ( pVtab , rowid , auxiliary_idx , & v ) ;
if ( rc = = SQLITE_OK ) {
sqlite3_result_value ( context , v ) ;
sqlite3_value_free ( v ) ;
} else {
sqlite3_result_error_code ( context , rc ) ;
}
}
2024-11-20 00:02:04 -08:00
2026-03-29 19:44:44 -07:00
else if ( vec0_column_idx_is_metadata ( pVtab , i ) ) {
if ( sqlite3_vtab_nochange ( context ) ) {
return SQLITE_OK ;
}
i64 rowid = pCur - > point_data - > rowid ;
int metadata_idx = vec0_column_idx_to_metadata_idx ( pVtab , i ) ;
int rc = vec0_result_metadata_value_for_rowid ( pVtab , rowid , metadata_idx , context ) ;
if ( rc ! = SQLITE_OK ) {
const char * zErr = sqlite3_mprintf (
" Could not extract metadata value for column %.*s at rowid %lld " ,
pVtab - > metadata_columns [ metadata_idx ] . name_length ,
pVtab - > metadata_columns [ metadata_idx ] . name , rowid
2024-11-20 00:02:04 -08:00
) ;
2026-03-29 19:44:44 -07:00
if ( zErr ) {
sqlite3_result_error ( context , zErr , - 1 ) ;
sqlite3_free ( ( void * ) zErr ) ;
} else {
sqlite3_result_error_nomem ( context ) ;
}
2024-11-20 00:02:04 -08:00
}
}
2026-03-29 19:44:44 -07:00
return SQLITE_OK ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
static int vec0Column_knn ( vec0_vtab * pVtab , vec0_cursor * pCur ,
sqlite3_context * context , int i ) {
if ( ! pCur - > knn_data ) {
sqlite3_result_error ( context ,
" Internal sqlite-vec error: knn_data is NULL. " , - 1 ) ;
return SQLITE_ERROR ;
}
if ( i = = VEC0_COLUMN_ID ) {
i64 rowid = pCur - > knn_data - > rowids [ pCur - > knn_data - > current_idx ] ;
return vec0_result_id ( pVtab , context , rowid ) ;
}
else if ( i = = vec0_column_distance_idx ( pVtab ) ) {
sqlite3_result_double (
context , pCur - > knn_data - > distances [ pCur - > knn_data - > current_idx ] ) ;
return SQLITE_OK ;
}
else if ( vec0_column_idx_is_vector ( pVtab , i ) ) {
void * out ;
int sz ;
int vector_idx = vec0_column_idx_to_vector_idx ( pVtab , i ) ;
int rc = vec0_get_vector_data (
pVtab , pCur - > knn_data - > rowids [ pCur - > knn_data - > current_idx ] , vector_idx ,
& out , & sz ) ;
2024-06-23 17:46:42 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
return rc ;
2024-06-23 17:46:42 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_result_blob ( context , out , sz , sqlite3_free ) ;
sqlite3_result_subtype ( context ,
pVtab - > vector_columns [ vector_idx ] . element_type ) ;
return SQLITE_OK ;
}
else if ( vec0_column_idx_is_partition ( pVtab , i ) ) {
int partition_idx = vec0_column_idx_to_partition_idx ( pVtab , i ) ;
i64 rowid = pCur - > knn_data - > rowids [ pCur - > knn_data - > current_idx ] ;
sqlite3_value * v ;
int rc = vec0_get_partition_value_for_rowid ( pVtab , rowid , partition_idx , & v ) ;
if ( rc = = SQLITE_OK ) {
sqlite3_result_value ( context , v ) ;
sqlite3_value_free ( v ) ;
} else {
sqlite3_result_error_code ( context , rc ) ;
}
}
else if ( vec0_column_idx_is_auxiliary ( pVtab , i ) ) {
int auxiliary_idx = vec0_column_idx_to_auxiliary_idx ( pVtab , i ) ;
i64 rowid = pCur - > knn_data - > rowids [ pCur - > knn_data - > current_idx ] ;
sqlite3_value * v ;
int rc = vec0_get_auxiliary_value_for_rowid ( pVtab , rowid , auxiliary_idx , & v ) ;
if ( rc = = SQLITE_OK ) {
sqlite3_result_value ( context , v ) ;
sqlite3_value_free ( v ) ;
} else {
sqlite3_result_error_code ( context , rc ) ;
2024-06-23 17:46:42 -07:00
}
2026-03-29 19:44:44 -07:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
else if ( vec0_column_idx_is_metadata ( pVtab , i ) ) {
int metadata_idx = vec0_column_idx_to_metadata_idx ( pVtab , i ) ;
i64 rowid = pCur - > knn_data - > rowids [ pCur - > knn_data - > current_idx ] ;
int rc = vec0_result_metadata_value_for_rowid ( pVtab , rowid , metadata_idx , context ) ;
if ( rc ! = SQLITE_OK ) {
const char * zErr = sqlite3_mprintf (
" Could not extract metadata value for column %.*s at rowid %lld " ,
pVtab - > metadata_columns [ metadata_idx ] . name_length ,
pVtab - > metadata_columns [ metadata_idx ] . name , rowid
) ;
if ( zErr ) {
sqlite3_result_error ( context , zErr , - 1 ) ;
sqlite3_free ( ( void * ) zErr ) ;
} else {
sqlite3_result_error_nomem ( context ) ;
}
2024-04-20 13:38:58 -07:00
}
}
2026-03-29 19:44:44 -07:00
return SQLITE_OK ;
}
static int vec0Column ( sqlite3_vtab_cursor * cur , sqlite3_context * context ,
int i ) {
vec0_cursor * pCur = ( vec0_cursor * ) cur ;
vec0_vtab * pVtab = ( vec0_vtab * ) cur - > pVtab ;
switch ( pCur - > query_plan ) {
case VEC0_QUERY_PLAN_FULLSCAN : {
return vec0Column_fullscan ( pVtab , pCur , context , i ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
case VEC0_QUERY_PLAN_KNN : {
return vec0Column_knn ( pVtab , pCur , context , i ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
case VEC0_QUERY_PLAN_POINT : {
return vec0Column_point ( pVtab , pCur , context , i ) ;
2024-06-23 17:46:42 -07:00
}
}
2026-03-29 19:44:44 -07:00
return SQLITE_OK ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/**
* @ brief Handles the " insert rowid " step of a row insert operation of a vec0
* table .
*
* This function will insert a new row into the _rowids vec0 shadow table .
*
* @ param p : virtual table
* @ param idValue : Value containing the inserted rowid / id value .
* @ param rowid : Output rowid , will point to the " real " i64 rowid
* value that was inserted
* @ return int SQLITE_OK on success , error code on failure
*/
int vec0Update_InsertRowidStep ( vec0_vtab * p , sqlite3_value * idValue ,
i64 * rowid ) {
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
/**
* An insert into a vec0 table can happen a few different ways :
* 1 ) With default INTEGER primary key : With a supplied i64 rowid
* 2 ) With default INTEGER primary key : WITHOUT a supplied rowid
* 3 ) With TEXT primary key : supplied text rowid
*/
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
int rc ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
// Option 3: vtab has a user-defined TEXT primary key, so ensure a text value
// is provided.
if ( p - > pkIsText ) {
if ( sqlite3_value_type ( idValue ) ! = SQLITE_TEXT ) {
// IMP: V04200_21039
vtab_set_error ( & p - > base ,
" The %s virtual table was declared with a TEXT primary "
" key, but a non-TEXT value was provided in an INSERT. " ,
p - > tableName ) ;
return SQLITE_ERROR ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
return vec0_rowids_insert_id ( p , idValue , rowid ) ;
}
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
// Option 1: User supplied a i64 rowid
if ( sqlite3_value_type ( idValue ) = = SQLITE_INTEGER ) {
i64 suppliedRowid = sqlite3_value_int64 ( idValue ) ;
rc = vec0_rowids_insert_rowid ( p , suppliedRowid ) ;
if ( rc = = SQLITE_OK ) {
* rowid = suppliedRowid ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
return rc ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
// Option 2: User did not suppled a rowid
2024-06-23 17:46:42 -07:00
2026-03-29 19:44:44 -07:00
if ( sqlite3_value_type ( idValue ) ! = SQLITE_NULL ) {
// IMP: V30855_14925
2024-07-23 23:57:42 -07:00
vtab_set_error ( & p - > base ,
2026-03-29 19:44:44 -07:00
" Only integers are allows for primary key values on %s " ,
p - > tableName ) ;
return SQLITE_ERROR ;
2024-07-23 23:57:28 -07:00
}
2026-03-29 19:44:44 -07:00
// NULL to get next auto-incremented value
return vec0_rowids_insert_id ( p , NULL , rowid ) ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
/**
* @ brief Determines the " next available " chunk position for a newly inserted
* vec0 row .
*
* This operation may insert a new " blank " chunk the _chunks table , if there is
* no more space in previous chunks .
*
* @ param p : virtual table
* @ param partitionKeyValues : array of partition key column values , to constrain
* against any partition key columns .
* @ param chunk_rowid : Output rowid of the chunk in the _chunks virtual table
* that has the avialabiity .
* @ param chunk_offset : Output the index of the available space insert the
* chunk , based on the index of the first available validity bit .
* @ param pBlobValidity : Output blob of the validity column of the available
* chunk . Will be opened with read / write permissions .
* @ param pValidity : Output buffer of the original chunk ' s validity column .
* Needs to be cleaned up with sqlite3_free ( ) .
* @ return int SQLITE_OK on success , error code on failure
*/
int vec0Update_InsertNextAvailableStep (
vec0_vtab * p ,
sqlite3_value * * partitionKeyValues ,
i64 * chunk_rowid , i64 * chunk_offset ,
sqlite3_blob * * blobChunksValidity ,
const unsigned char * * bufferChunksValidity ) {
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
int rc ;
i64 validitySize ;
* chunk_offset = - 1 ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
rc = vec0_get_latest_chunk_rowid ( p , chunk_rowid , partitionKeyValues ) ;
if ( rc = = SQLITE_EMPTY ) {
goto done ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_OK ) {
2024-06-28 10:51:49 -07:00
goto cleanup ;
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowChunksName , " validity " ,
* chunk_rowid , 1 , blobChunksValidity ) ;
2024-06-28 10:51:49 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
// IMP: V22053_06123
vtab_set_error ( & p - > base ,
VEC_INTERAL_ERROR
" could not open validity blob on %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , * chunk_rowid ) ;
2024-06-28 10:51:49 -07:00
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
validitySize = sqlite3_blob_bytes ( * blobChunksValidity ) ;
if ( validitySize ! = p - > chunk_size / CHAR_BIT ) {
// IMP: V29362_13432
2024-06-28 10:51:49 -07:00
vtab_set_error ( & p - > base ,
2026-03-29 19:44:44 -07:00
VEC_INTERAL_ERROR
" validity blob size mismatch on "
" %s.%s.%lld, expected %lld but received %lld. " ,
p - > schemaName , p - > shadowChunksName , * chunk_rowid ,
( i64 ) ( p - > chunk_size / CHAR_BIT ) , validitySize ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
2024-06-28 10:51:49 -07:00
}
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
2026-03-29 19:44:44 -07:00
* bufferChunksValidity = sqlite3_malloc ( validitySize ) ;
if ( ! ( * bufferChunksValidity ) ) {
vtab_set_error ( & p - > base , VEC_INTERAL_ERROR
" Could not allocate memory for validity bitmap " ) ;
rc = SQLITE_NOMEM ;
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_read ( * blobChunksValidity , ( void * ) * bufferChunksValidity ,
validitySize , 0 ) ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
2026-03-29 19:44:44 -07:00
VEC_INTERAL_ERROR
" Could not read validity bitmap for %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , * chunk_rowid ) ;
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
// find the next available offset, ie first `0` in the bitmap.
for ( int i = 0 ; i < validitySize ; i + + ) {
if ( ( * bufferChunksValidity ) [ i ] = = 0 b11111111 )
continue ;
for ( int j = 0 ; j < CHAR_BIT ; j + + ) {
if ( ( ( ( ( * bufferChunksValidity ) [ i ] > > j ) & 1 ) = = 0 ) ) {
* chunk_offset = ( i * CHAR_BIT ) + j ;
goto done ;
}
}
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
done :
// latest chunk was full, so need to create a new one
if ( * chunk_offset = = - 1 ) {
rc = vec0_new_chunk ( p , partitionKeyValues , chunk_rowid ) ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
// IMP: V08441_25279
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
vtab_set_error ( & p - > base ,
2026-03-29 19:44:44 -07:00
VEC_INTERAL_ERROR " Could not insert a new vector chunk " ) ;
rc = SQLITE_ERROR ; // otherwise raises a DatabaseError and not operational
// error?
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
* chunk_offset = 0 ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
2026-03-29 19:44:44 -07:00
// blobChunksValidity and pValidity are stale, pointing to the previous
// (full) chunk. to re-assign them
rc = sqlite3_blob_close ( * blobChunksValidity ) ;
sqlite3_free ( ( void * ) * bufferChunksValidity ) ;
* blobChunksValidity = NULL ;
* bufferChunksValidity = NULL ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , VEC_INTERAL_ERROR
" unknown error, blobChunksValidity could not be closed, "
" please file an issue. " ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowChunksName ,
" validity " , * chunk_rowid , 1 , blobChunksValidity ) ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
if ( rc ! = SQLITE_OK ) {
vtab_set_error (
& p - > base ,
2026-03-29 19:44:44 -07:00
VEC_INTERAL_ERROR
" Could not open validity blob for newly created chunk %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , * chunk_rowid ) ;
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
validitySize = sqlite3_blob_bytes ( * blobChunksValidity ) ;
if ( validitySize ! = p - > chunk_size / CHAR_BIT ) {
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
vtab_set_error ( & p - > base ,
2026-03-29 19:44:44 -07:00
VEC_INTERAL_ERROR
" validity blob size mismatch for newly created chunk "
" %s.%s.%lld. Exepcted %lld, got %lld " ,
p - > schemaName , p - > shadowChunksName , * chunk_rowid ,
p - > chunk_size / CHAR_BIT , validitySize ) ;
goto cleanup ;
}
* bufferChunksValidity = sqlite3_malloc ( validitySize ) ;
rc = sqlite3_blob_read ( * blobChunksValidity , ( void * ) * bufferChunksValidity ,
validitySize , 0 ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
VEC_INTERAL_ERROR
" could not read validity blob newly created chunk "
" %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , * chunk_rowid ) ;
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
}
2026-03-29 19:44:44 -07:00
rc = SQLITE_OK ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
2026-03-29 19:44:44 -07:00
cleanup :
return rc ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
/**
* @ brief Write the vector data into the provided vector blob at the given
* offset
*
* @ param blobVectors SQLite BLOB to write to
* @ param chunk_offset the " offset " ( ie validity bitmap position ) to write the
* vector to
* @ param bVector pointer to the vector containing data
* @ param dimensions how many dimensions the vector has
* @ param element_type the vector type
* @ return result of sqlite3_blob_write , SQLITE_OK on success , otherwise failure
*/
static int
vec0_write_vector_to_vector_blob ( sqlite3_blob * blobVectors , i64 chunk_offset ,
const void * bVector , size_t dimensions ,
enum VectorElementType element_type ) {
int n ;
int offset ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
switch ( element_type ) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32 :
n = dimensions * sizeof ( f32 ) ;
offset = chunk_offset * dimensions * sizeof ( f32 ) ;
break ;
case SQLITE_VEC_ELEMENT_TYPE_INT8 :
n = dimensions * sizeof ( i8 ) ;
offset = chunk_offset * dimensions * sizeof ( i8 ) ;
break ;
case SQLITE_VEC_ELEMENT_TYPE_BIT :
n = dimensions / CHAR_BIT ;
offset = chunk_offset * dimensions / CHAR_BIT ;
break ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
return sqlite3_blob_write ( blobVectors , bVector , n , offset ) ;
}
/**
* @ brief
*
* @ param p vec0 virtual table
* @ param chunk_rowid : which chunk to write to
* @ param chunk_offset : the offset inside the chunk to write the vector to .
* @ param rowid : the rowid of the inserting row
* @ param vectorDatas : array of the vector data to insert
* @ param blobValidity : writeable validity blob of the row ' s assigned chunk .
* @ param validity : snapshot buffer of the valdity column from the row ' s
* assigned chunk .
* @ return int SQLITE_OK on success , error code on failure
*/
int vec0Update_InsertWriteFinalStep ( vec0_vtab * p , i64 chunk_rowid ,
i64 chunk_offset , i64 rowid ,
void * vectorDatas [ ] ,
sqlite3_blob * blobChunksValidity ,
const unsigned char * bufferChunksValidity ) {
int rc , brc ;
sqlite3_blob * blobChunksRowids = NULL ;
// mark the validity bit for this row in the chunk's validity bitmap
// Get the byte offset of the bitmap
char unsigned bx = bufferChunksValidity [ chunk_offset / CHAR_BIT ] ;
// set the bit at the chunk_offset position inside that byte
bx = bx | ( 1 < < ( chunk_offset % CHAR_BIT ) ) ;
// write that 1 byte
rc = sqlite3_blob_write ( blobChunksValidity , & bx , 1 , chunk_offset / CHAR_BIT ) ;
2024-06-28 10:51:59 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
vtab_set_error ( & p - > base , VEC_INTERAL_ERROR " could not mark validity bit " ) ;
return rc ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
// Go insert the vector data into the vector chunk shadow tables
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
2026-03-31 13:51:08 -07:00
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_FLAT )
2026-03-29 19:45:54 -07:00
continue ;
2026-03-29 19:44:44 -07:00
sqlite3_blob * blobVectors ;
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowVectorChunksNames [ i ] ,
" vectors " , chunk_rowid , 1 , & blobVectors ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , " Error opening vector blob at %s.%s.%lld " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_rowid ) ;
goto cleanup ;
}
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
i64 expected =
p - > chunk_size * vector_column_byte_size ( p - > vector_columns [ i ] ) ;
i64 actual = sqlite3_blob_bytes ( blobVectors ) ;
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
if ( actual ! = expected ) {
// IMP: V16386_00456
vtab_set_error (
& p - > base ,
VEC_INTERAL_ERROR
" vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_rowid , expected ,
actual ) ;
rc = SQLITE_ERROR ;
// already error, can ignore result code
sqlite3_blob_close ( blobVectors ) ;
goto cleanup ;
} ;
rc = vec0_write_vector_to_vector_blob (
blobVectors , chunk_offset , vectorDatas [ i ] ,
p - > vector_columns [ i ] . dimensions , p - > vector_columns [ i ] . element_type ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
VEC_INTERAL_ERROR
" could not write vector blob on %s.%s.%lld " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_rowid ) ;
rc = SQLITE_ERROR ;
// already error, can ignore result code
sqlite3_blob_close ( blobVectors ) ;
goto cleanup ;
}
rc = sqlite3_blob_close ( blobVectors ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
VEC_INTERAL_ERROR
" could not close vector blob on %s.%s.%lld " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_rowid ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
// write the new rowid to the rowids column of the _chunks table
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowChunksName , " rowids " ,
chunk_rowid , 1 , & blobChunksRowids ) ;
2024-11-20 00:30:23 -08:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
// IMP: V09221_26060
vtab_set_error ( & p - > base ,
VEC_INTERAL_ERROR " could not open rowids blob on %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , chunk_rowid ) ;
2024-11-20 00:30:23 -08:00
goto cleanup ;
}
2026-03-29 19:44:44 -07:00
i64 expected = p - > chunk_size * sizeof ( i64 ) ;
i64 actual = sqlite3_blob_bytes ( blobChunksRowids ) ;
if ( expected ! = actual ) {
// IMP: V12779_29618
vtab_set_error (
& p - > base ,
VEC_INTERAL_ERROR
" rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld " ,
p - > schemaName , p - > shadowChunksName , chunk_rowid , expected , actual ) ;
rc = SQLITE_ERROR ;
2024-11-20 00:30:23 -08:00
goto cleanup ;
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_write ( blobChunksRowids , & rowid , sizeof ( i64 ) ,
chunk_offset * sizeof ( i64 ) ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error (
& p - > base , VEC_INTERAL_ERROR " could not write rowids blob on %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , chunk_rowid ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
// Now with all the vectors inserted, go back and update the _rowids table
// with the new chunk_rowid/chunk_offset values
rc = vec0_rowids_update_position ( p , rowid , chunk_rowid , chunk_offset ) ;
2024-11-20 00:30:23 -08:00
cleanup :
2026-03-29 19:44:44 -07:00
brc = sqlite3_blob_close ( blobChunksRowids ) ;
if ( ( rc = = SQLITE_OK ) & & ( brc ! = SQLITE_OK ) ) {
vtab_set_error (
& p - > base , VEC_INTERAL_ERROR " could not close rowids blob on %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , chunk_rowid ) ;
return brc ;
}
2024-11-20 00:30:23 -08:00
return rc ;
}
2026-03-29 19:44:44 -07:00
int vec0_write_metadata_value ( vec0_vtab * p , int metadata_column_idx , i64 rowid , i64 chunk_id , i64 chunk_offset , sqlite3_value * v , int isupdate ) {
2024-11-20 00:59:34 -08:00
int rc ;
2026-03-29 19:44:44 -07:00
struct Vec0MetadataColumnDefinition * metadata_column = & p - > metadata_columns [ metadata_column_idx ] ;
vec0_metadata_column_kind kind = metadata_column - > kind ;
// verify input value matches column type
switch ( kind ) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN : {
if ( sqlite3_value_type ( v ) ! = SQLITE_INTEGER | | ( ( sqlite3_value_int ( v ) ! = 0 ) & & ( sqlite3_value_int ( v ) ! = 1 ) ) ) {
rc = SQLITE_ERROR ;
vtab_set_error ( & p - > base , " Expected 0 or 1 for BOOLEAN metadata column %.*s " , metadata_column - > name_length , metadata_column - > name ) ;
goto done ;
}
break ;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER : {
if ( sqlite3_value_type ( v ) ! = SQLITE_INTEGER ) {
rc = SQLITE_ERROR ;
vtab_set_error ( & p - > base , " Expected integer for INTEGER metadata column %.*s, received %s " , metadata_column - > name_length , metadata_column - > name , type_name ( sqlite3_value_type ( v ) ) ) ;
goto done ;
}
break ;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT : {
if ( sqlite3_value_type ( v ) ! = SQLITE_FLOAT ) {
rc = SQLITE_ERROR ;
vtab_set_error ( & p - > base , " Expected float for FLOAT metadata column %.*s, received %s " , metadata_column - > name_length , metadata_column - > name , type_name ( sqlite3_value_type ( v ) ) ) ;
goto done ;
}
break ;
}
case VEC0_METADATA_COLUMN_KIND_TEXT : {
if ( sqlite3_value_type ( v ) ! = SQLITE_TEXT ) {
rc = SQLITE_ERROR ;
vtab_set_error ( & p - > base , " Expected text for TEXT metadata column %.*s, received %s " , metadata_column - > name_length , metadata_column - > name , type_name ( sqlite3_value_type ( v ) ) ) ;
goto done ;
}
break ;
}
}
sqlite3_blob * blobValue = NULL ;
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowMetadataChunksNames [ metadata_column_idx ] , " data " , chunk_id , 1 , & blobValue ) ;
2024-11-20 00:59:34 -08:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
goto done ;
2024-11-20 00:59:34 -08:00
}
switch ( kind ) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN : {
u8 block ;
2026-03-29 19:44:44 -07:00
int value = sqlite3_value_int ( v ) ;
2024-11-20 00:59:34 -08:00
rc = sqlite3_blob_read ( blobValue , & block , sizeof ( u8 ) , ( int ) ( chunk_offset / CHAR_BIT ) ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
2026-03-29 19:44:44 -07:00
if ( value ) {
block | = 1 < < ( chunk_offset % CHAR_BIT ) ;
} else {
block & = ~ ( 1 < < ( chunk_offset % CHAR_BIT ) ) ;
}
2024-11-20 00:59:34 -08:00
rc = sqlite3_blob_write ( blobValue , & block , sizeof ( u8 ) , chunk_offset / CHAR_BIT ) ;
break ;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER : {
2026-03-29 19:44:44 -07:00
i64 value = sqlite3_value_int64 ( v ) ;
rc = sqlite3_blob_write ( blobValue , & value , sizeof ( value ) , chunk_offset * sizeof ( i64 ) ) ;
2024-11-20 00:59:34 -08:00
break ;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT : {
2026-03-29 19:44:44 -07:00
double value = sqlite3_value_double ( v ) ;
rc = sqlite3_blob_write ( blobValue , & value , sizeof ( value ) , chunk_offset * sizeof ( double ) ) ;
2024-11-20 00:59:34 -08:00
break ;
}
case VEC0_METADATA_COLUMN_KIND_TEXT : {
2026-03-29 19:44:44 -07:00
int prev_n ;
rc = sqlite3_blob_read ( blobValue , & prev_n , sizeof ( int ) , chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ) ;
2024-11-20 00:59:34 -08:00
if ( rc ! = SQLITE_OK ) {
goto done ;
}
2026-03-29 19:44:44 -07:00
const char * s = ( const char * ) sqlite3_value_text ( v ) ;
int n = sqlite3_value_bytes ( v ) ;
2024-11-20 00:59:34 -08:00
u8 view [ VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
memset ( view , 0 , VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ) ;
2026-03-29 19:44:44 -07:00
memcpy ( view , & n , sizeof ( int ) ) ;
memcpy ( view + 4 , s , min ( n , VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH - 4 ) ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_write ( blobValue , & view , VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH , chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ) ;
2024-11-20 00:59:34 -08:00
if ( n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
2026-03-29 19:44:44 -07:00
const char * zSql ;
if ( isupdate & & ( prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) ) {
zSql = sqlite3_mprintf ( " UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " SET data = ?2 WHERE rowid = ?1 " , p - > schemaName , p - > tableName , metadata_column_idx ) ;
} else {
zSql = sqlite3_mprintf ( " INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid, data) VALUES (?1, ?2) " , p - > schemaName , p - > tableName , metadata_column_idx ) ;
}
2024-11-20 00:59:34 -08:00
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto done ;
}
sqlite3_stmt * stmt ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
sqlite3_bind_int64 ( stmt , 1 , rowid ) ;
2026-03-29 19:44:44 -07:00
sqlite3_bind_text ( stmt , 2 , s , n , SQLITE_STATIC ) ;
2024-11-20 00:59:34 -08:00
rc = sqlite3_step ( stmt ) ;
2026-03-29 19:44:44 -07:00
sqlite3_finalize ( stmt ) ;
2024-11-20 00:59:34 -08:00
if ( rc ! = SQLITE_DONE ) {
rc = SQLITE_ERROR ;
goto done ;
}
2026-03-29 19:44:44 -07:00
}
else if ( prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
const char * zSql = sqlite3_mprintf ( " DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ? " , p - > schemaName , p - > tableName , metadata_column_idx ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto done ;
}
sqlite3_stmt * stmt ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
sqlite3_bind_int64 ( stmt , 1 , rowid ) ;
rc = sqlite3_step ( stmt ) ;
2024-11-20 00:59:34 -08:00
sqlite3_finalize ( stmt ) ;
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_DONE ) {
rc = SQLITE_ERROR ;
goto done ;
}
2024-11-20 00:59:34 -08:00
}
break ;
}
}
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_OK ) {
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_close ( blobValue ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
done :
return rc ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
/**
* @ brief Handles INSERT INTO operations on a vec0 table .
*
* @ return int SQLITE_OK on success , otherwise error code on failure
*/
int vec0Update_Insert ( sqlite3_vtab * pVTab , int argc , sqlite3_value * * argv ,
sqlite_int64 * pRowid ) {
UNUSED_PARAMETER ( argc ) ;
2024-06-28 10:51:49 -07:00
vec0_vtab * p = ( vec0_vtab * ) pVTab ;
int rc ;
2026-03-29 19:44:44 -07:00
// Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow
// table
2024-08-09 12:16:56 -07:00
i64 rowid ;
2026-03-29 19:44:44 -07:00
// Array to hold the vector data of the inserted row. Individual elements will
// have a lifetime bound to the argv[..] values.
void * vectorDatas [ VEC0_MAX_VECTOR_COLUMNS ] ;
// Array to hold cleanup functions for vectorDatas[]
vector_cleanup cleanups [ VEC0_MAX_VECTOR_COLUMNS ] ;
sqlite3_value * partitionKeyValues [ VEC0_MAX_PARTITION_COLUMNS ] ;
// Rowid of the chunk in the _chunks shadow table that the row will be a part
// of.
i64 chunk_rowid ;
// offset within the chunk where the rowid belongs
2024-06-28 10:51:49 -07:00
i64 chunk_offset ;
2026-03-29 19:44:44 -07:00
// a write-able blob of the validity column for the given chunk. Used to mark
// validity bit
sqlite3_blob * blobChunksValidity = NULL ;
// buffer for the valididty column for the given chunk. Maybe not needed here?
const unsigned char * bufferChunksValidity = NULL ;
int numReadVectors = 0 ;
// Read all provided partition key values into partitionKeyValues
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION ) {
continue ;
}
int partition_key_idx = p - > user_column_idxs [ i ] ;
partitionKeyValues [ partition_key_idx ] = argv [ 2 + VEC0_COLUMN_USERN_START + i ] ;
int new_value_type = sqlite3_value_type ( partitionKeyValues [ partition_key_idx ] ) ;
if ( ( new_value_type ! = SQLITE_NULL ) & & ( new_value_type ! = p - > paritition_columns [ partition_key_idx ] . type ) ) {
// IMP: V11454_28292
vtab_set_error (
pVTab ,
" Parition key type mismatch: The partition key column %.*s has type %s, but %s was provided. " ,
p - > paritition_columns [ partition_key_idx ] . name_length ,
p - > paritition_columns [ partition_key_idx ] . name ,
type_name ( p - > paritition_columns [ partition_key_idx ] . type ) ,
type_name ( new_value_type )
) ;
rc = SQLITE_ERROR ;
goto cleanup ;
2024-08-09 12:16:56 -07:00
}
}
2026-03-29 19:44:44 -07:00
// read all the inserted vectors into vectorDatas, validate their lengths.
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR ) {
continue ;
}
int vector_column_idx = p - > user_column_idxs [ i ] ;
sqlite3_value * valueVector = argv [ 2 + VEC0_COLUMN_USERN_START + i ] ;
size_t dimensions ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
char * pzError ;
enum VectorElementType elementType ;
rc = vector_from_value ( valueVector , & vectorDatas [ vector_column_idx ] , & dimensions ,
& elementType , & cleanups [ vector_column_idx ] , & pzError ) ;
if ( rc ! = SQLITE_OK ) {
// IMP: V06519_23358
vtab_set_error (
pVTab , " Inserted vector for the \" %.*s \" column is invalid: %z " ,
p - > vector_columns [ vector_column_idx ] . name_length , p - > vector_columns [ vector_column_idx ] . name , pzError ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
numReadVectors + + ;
if ( elementType ! = p - > vector_columns [ vector_column_idx ] . element_type ) {
// IMP: V08221_25059
vtab_set_error (
pVTab ,
" Inserted vector for the \" %.*s \" column is expected to be of type "
" %s, but a %s vector was provided. " ,
p - > vector_columns [ i ] . name_length , p - > vector_columns [ i ] . name ,
vector_subtype_name ( p - > vector_columns [ i ] . element_type ) ,
vector_subtype_name ( elementType ) ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
if ( dimensions ! = p - > vector_columns [ vector_column_idx ] . dimensions ) {
// IMP: V01145_17984
vtab_set_error (
pVTab ,
" Dimension mismatch for inserted vector for the \" %.*s \" column. "
" Expected %d dimensions but received %d. " ,
p - > vector_columns [ vector_column_idx ] . name_length , p - > vector_columns [ vector_column_idx ] . name ,
p - > vector_columns [ vector_column_idx ] . dimensions , dimensions ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
// Cannot insert a value in the hidden "distance" column
if ( sqlite3_value_type ( argv [ 2 + vec0_column_distance_idx ( p ) ] ) ! =
SQLITE_NULL ) {
// IMP: V24228_08298
vtab_set_error ( pVTab ,
" A value was provided for the hidden \" distance \" column. " ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
// Cannot insert a value in the hidden "k" column
if ( sqlite3_value_type ( argv [ 2 + vec0_column_k_idx ( p ) ] ) ! = SQLITE_NULL ) {
// IMP: V11875_28713
vtab_set_error ( pVTab , " A value was provided for the hidden \" k \" column. " ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
// Step #1: Insert/get a rowid for this row, from the _rowids table.
rc = vec0Update_InsertRowidStep ( p , argv [ 2 + VEC0_COLUMN_ID ] , & rowid ) ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2024-06-28 10:51:49 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
if ( ! vec0_all_columns_diskann ( p ) ) {
// Step #2: Find the next "available" position in the _chunks table for this
// row.
rc = vec0Update_InsertNextAvailableStep ( p , partitionKeyValues ,
& chunk_rowid , & chunk_offset ,
& blobChunksValidity ,
& bufferChunksValidity ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
// Step #3: With the next available chunk position, write out all the vectors
// to their specified location.
rc = vec0Update_InsertWriteFinalStep ( p , chunk_rowid , chunk_offset , rowid ,
vectorDatas , blobChunksValidity ,
bufferChunksValidity ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2024-06-28 10:51:49 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
// Step #4: Insert into DiskANN graph for indexed vector columns
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_DISKANN ) continue ;
rc = diskann_insert ( p , i , rowid , vectorDatas [ i ] ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
2024-06-28 10:51:49 -07:00
}
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# endif
2024-06-28 10:51:49 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
rc = rescore_on_insert ( p , chunk_rowid , chunk_offset , rowid , vectorDatas ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
# endif
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// Step #4: IVF index insert (if any vector column uses IVF)
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_IVF ) continue ;
int vecSize = ( int ) vector_column_byte_size ( p - > vector_columns [ i ] ) ;
rc = ivf_insert ( p , i , rowid , vectorDatas [ i ] , vecSize ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
}
# endif
2024-11-20 00:30:23 -08:00
if ( p - > numAuxiliaryColumns > 0 ) {
2026-03-29 19:44:44 -07:00
sqlite3_stmt * stmt ;
sqlite3_str * s = sqlite3_str_new ( NULL ) ;
sqlite3_str_appendf ( s , " INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME " (rowid " , p - > schemaName , p - > tableName ) ;
for ( int i = 0 ; i < p - > numAuxiliaryColumns ; i + + ) {
sqlite3_str_appendf ( s , " , value%02d " , i ) ;
}
sqlite3_str_appendall ( s , " ) VALUES (? " ) ;
for ( int i = 0 ; i < p - > numAuxiliaryColumns ; i + + ) {
sqlite3_str_appendall ( s , " , ? " ) ;
}
sqlite3_str_appendall ( s , " ) " ) ;
char * zSql = sqlite3_str_finish ( s ) ;
// TODO double check error handling ehre
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto cleanup ;
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
sqlite3_bind_int64 ( stmt , 1 , rowid ) ;
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY ) {
continue ;
}
int auxiliary_key_idx = p - > user_column_idxs [ i ] ;
sqlite3_value * v = argv [ 2 + VEC0_COLUMN_USERN_START + i ] ;
int v_type = sqlite3_value_type ( v ) ;
if ( v_type ! = SQLITE_NULL & & ( v_type ! = p - > auxiliary_columns [ auxiliary_key_idx ] . type ) ) {
sqlite3_finalize ( stmt ) ;
rc = SQLITE_CONSTRAINT ;
vtab_set_error (
pVTab ,
" Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided. " ,
p - > auxiliary_columns [ auxiliary_key_idx ] . name_length ,
p - > auxiliary_columns [ auxiliary_key_idx ] . name ,
type_name ( p - > auxiliary_columns [ auxiliary_key_idx ] . type ) ,
type_name ( v_type )
) ;
goto cleanup ;
}
// first 1 is for 1-based indexing on sqlite3_bind_*, second 1 is to account for initial rowid parameter
sqlite3_bind_value ( stmt , 1 + 1 + auxiliary_key_idx , v ) ;
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_step ( stmt ) ;
if ( rc ! = SQLITE_DONE ) {
sqlite3_finalize ( stmt ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_finalize ( stmt ) ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2026-03-29 19:44:44 -07:00
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_METADATA ) {
continue ;
}
int metadata_idx = p - > user_column_idxs [ i ] ;
sqlite3_value * v = argv [ 2 + VEC0_COLUMN_USERN_START + i ] ;
rc = vec0_write_metadata_value ( p , metadata_idx , rowid , chunk_rowid , chunk_offset , v , 0 ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
Complete vec0 DELETE: zero data, reclaim empty chunks, fix metadata rc bug (#268)
When a row is deleted from a vec0 virtual table, the rowid slot in
_chunks.rowids and vector data in _vector_chunksNN.vectors are now
zeroed out (previously left as stale data, tracked in #54). When all
rows in a chunk are deleted (validity bitmap all zeros), the chunk and
its associated vector/metadata shadow table rows are reclaimed.
- Add vec0Update_Delete_ClearRowid to zero the rowid blob slot
- Add vec0Update_Delete_ClearVectors to zero all vector blob slots
- Add vec0Update_Delete_DeleteChunkIfEmpty to detect and delete
fully-empty chunks from _chunks, _vector_chunksNN, _metadatachunksNN
- Fix missing rc check in ClearMetadata loop (bug: errors were silently
ignored)
- Fix vec0_new_chunk to explicitly set _rowid_ on shadow table INSERTs
(SHADOW_TABLE_ROWID_QUIRK: "rowid PRIMARY KEY" without INTEGER type
is not a true rowid alias, causing blob_open failures after chunk
delete+recreate cycles)
- Add 13 new tests covering rowid/vector zeroing, chunk reclamation,
metadata/auxiliary/partition/text-PK/int8/bit variants, and
page_count shrinkage verification
- Add vec0-delete-completeness fuzz target
- Update snapshots for new delete zeroing behavior
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 00:02:36 -07:00
}
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
* pRowid = rowid ;
rc = SQLITE_OK ;
2024-11-20 00:30:23 -08:00
2026-03-29 19:44:44 -07:00
cleanup :
for ( int i = 0 ; i < numReadVectors ; i + + ) {
cleanups [ i ] ( vectorDatas [ i ] ) ;
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
sqlite3_free ( ( void * ) bufferChunksValidity ) ;
int brc = sqlite3_blob_close ( blobChunksValidity ) ;
if ( ( rc = = SQLITE_OK ) & & ( brc ! = SQLITE_OK ) ) {
vtab_set_error ( & p - > base ,
VEC_INTERAL_ERROR " unknown error, blobChunksValidity could "
" not be closed, please file an issue " ) ;
return brc ;
2024-11-20 00:30:23 -08:00
}
2026-03-29 19:44:44 -07:00
return rc ;
2024-06-28 10:51:49 -07:00
}
2026-03-29 19:44:44 -07:00
int vec0Update_Delete_ClearValidity ( vec0_vtab * p , i64 chunk_id ,
u64 chunk_offset ) {
int rc , brc ;
sqlite3_blob * blobChunksValidity = NULL ;
char unsigned bx ;
int validityOffset = chunk_offset / CHAR_BIT ;
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
// 2. ensure chunks.validity bit is 1, then set to 0
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowChunksName , " validity " ,
chunk_id , 1 , & blobChunksValidity ) ;
if ( rc ! = SQLITE_OK ) {
// IMP: V26002_10073
vtab_set_error ( & p - > base , " could not open validity blob for %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , chunk_id ) ;
return SQLITE_ERROR ;
}
// will skip the sqlite3_blob_bytes(blobChunksValidity) check for now,
// the read below would catch it
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_read ( blobChunksValidity , & bx , sizeof ( bx ) , validityOffset ) ;
2024-06-28 10:51:49 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
// IMP: V21193_05263
2024-06-28 10:51:49 -07:00
vtab_set_error (
2026-03-29 19:44:44 -07:00
& p - > base , " could not read validity blob for %s.%s.%lld at %d " ,
p - > schemaName , p - > shadowChunksName , chunk_id , validityOffset ) ;
2024-06-28 10:51:49 -07:00
goto cleanup ;
}
2026-03-29 19:44:44 -07:00
if ( ! ( bx > > ( chunk_offset % CHAR_BIT ) ) ) {
// IMP: V21193_05263
2024-06-28 10:51:49 -07:00
rc = SQLITE_ERROR ;
vtab_set_error (
& p - > base ,
2026-03-29 19:44:44 -07:00
" vec0 deletion error: validity bit is not set for %s.%s.%lld at %d " ,
p - > schemaName , p - > shadowChunksName , chunk_id , validityOffset ) ;
2024-06-28 10:51:49 -07:00
goto cleanup ;
}
2026-03-29 19:44:44 -07:00
char unsigned mask = ~ ( 1 < < ( chunk_offset % CHAR_BIT ) ) ;
char result = bx & mask ;
rc = sqlite3_blob_write ( blobChunksValidity , & result , sizeof ( bx ) ,
validityOffset ) ;
2024-06-28 10:51:49 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
vtab_set_error (
& p - > base , " could not write to validity blob for %s.%s.%lld at %d " ,
p - > schemaName , p - > shadowChunksName , chunk_id , validityOffset ) ;
2024-06-28 10:51:49 -07:00
goto cleanup ;
}
2024-04-20 13:38:58 -07:00
2024-06-28 10:51:49 -07:00
cleanup :
2026-03-29 19:44:44 -07:00
brc = sqlite3_blob_close ( blobChunksValidity ) ;
if ( rc ! = SQLITE_OK )
2024-06-28 10:51:49 -07:00
return rc ;
if ( brc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
vtab_set_error ( & p - > base ,
" vec0 deletion error: Error commiting validity blob "
" transaction on %s.%s.%lld at %d " ,
p - > schemaName , p - > shadowChunksName , chunk_id ,
validityOffset ) ;
2024-06-28 10:51:49 -07:00
return brc ;
}
2024-04-20 13:38:58 -07:00
return SQLITE_OK ;
}
2026-03-29 19:44:44 -07:00
int vec0Update_Delete_ClearRowid ( vec0_vtab * p , i64 chunk_id ,
u64 chunk_offset ) {
int rc , brc ;
sqlite3_blob * blobChunksRowids = NULL ;
i64 zero = 0 ;
2024-08-09 12:16:56 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowChunksName , " rowids " ,
chunk_id , 1 , & blobChunksRowids ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , " could not open rowids blob for %s.%s.%lld " ,
p - > schemaName , p - > shadowChunksName , chunk_id ) ;
return SQLITE_ERROR ;
2024-08-09 12:16:56 -07:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_write ( blobChunksRowids , & zero , sizeof ( zero ) ,
chunk_offset * sizeof ( i64 ) ) ;
2024-06-28 10:51:59 -07:00
if ( rc ! = SQLITE_OK ) {
2026-03-29 19:44:44 -07:00
vtab_set_error ( & p - > base ,
" could not write to rowids blob for %s.%s.%lld at %llu " ,
p - > schemaName , p - > shadowChunksName , chunk_id , chunk_offset ) ;
2024-06-28 10:51:49 -07:00
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
brc = sqlite3_blob_close ( blobChunksRowids ) ;
if ( rc ! = SQLITE_OK )
return rc ;
if ( brc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
" vec0 deletion error: Error commiting rowids blob "
" transaction on %s.%s.%lld at %llu " ,
p - > schemaName , p - > shadowChunksName , chunk_id , chunk_offset ) ;
return brc ;
2024-11-20 00:02:04 -08:00
}
2026-03-29 19:44:44 -07:00
return SQLITE_OK ;
}
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
int vec0Update_Delete_ClearVectors ( vec0_vtab * p , i64 chunk_id ,
u64 chunk_offset ) {
int rc , brc ;
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
2026-03-31 13:51:08 -07:00
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
2026-03-29 19:46:23 -07:00
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_FLAT )
2026-03-29 19:45:54 -07:00
continue ;
2026-03-29 19:44:44 -07:00
sqlite3_blob * blobVectors = NULL ;
size_t n = vector_column_byte_size ( p - > vector_columns [ i ] ) ;
rc = sqlite3_blob_open ( p - > db , p - > schemaName ,
p - > shadowVectorChunksNames [ i ] , " vectors " ,
chunk_id , 1 , & blobVectors ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
" could not open vector blob for %s.%s.%lld column %d " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_id , i ) ;
2024-11-20 00:30:23 -08:00
return SQLITE_ERROR ;
}
2026-03-29 19:44:44 -07:00
void * zeroBuf = sqlite3_malloc ( n ) ;
if ( ! zeroBuf ) {
sqlite3_blob_close ( blobVectors ) ;
return SQLITE_NOMEM ;
2024-11-20 00:59:34 -08:00
}
2026-03-29 19:44:44 -07:00
memset ( zeroBuf , 0 , n ) ;
2024-11-20 00:59:34 -08:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_write ( blobVectors , zeroBuf , n , chunk_offset * n ) ;
sqlite3_free ( zeroBuf ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error (
& p - > base ,
" could not write to vector blob for %s.%s.%lld at %llu column %d " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_id ,
chunk_offset , i ) ;
2024-04-20 13:38:58 -07:00
}
2024-06-28 10:51:49 -07:00
2026-03-29 19:44:44 -07:00
brc = sqlite3_blob_close ( blobVectors ) ;
if ( rc ! = SQLITE_OK )
return rc ;
if ( brc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
" vec0 deletion error: Error commiting vector blob "
" transaction on %s.%s.%lld column %d " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_id , i ) ;
return brc ;
2024-04-20 13:38:58 -07:00
}
}
return SQLITE_OK ;
}
2026-03-29 19:44:44 -07:00
int vec0Update_Delete_DeleteChunkIfEmpty ( vec0_vtab * p , i64 chunk_id ,
int * deleted ) {
int rc , brc ;
sqlite3_blob * blobValidity = NULL ;
* deleted = 0 ;
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowChunksName , " validity " ,
chunk_id , 0 , & blobValidity ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base ,
" could not open validity blob for chunk %lld " , chunk_id ) ;
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR ;
}
2026-03-29 19:44:44 -07:00
int validitySize = sqlite3_blob_bytes ( blobValidity ) ;
unsigned char * validityBuf = sqlite3_malloc ( validitySize ) ;
if ( ! validityBuf ) {
sqlite3_blob_close ( blobValidity ) ;
return SQLITE_NOMEM ;
2024-04-20 13:38:58 -07:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_read ( blobValidity , validityBuf , validitySize , 0 ) ;
brc = sqlite3_blob_close ( blobValidity ) ;
if ( rc ! = SQLITE_OK ) {
sqlite3_free ( validityBuf ) ;
return rc ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
if ( brc ! = SQLITE_OK ) {
sqlite3_free ( validityBuf ) ;
return brc ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
int allZero = 1 ;
for ( int i = 0 ; i < validitySize ; i + + ) {
if ( validityBuf [ i ] ! = 0 ) {
allZero = 0 ;
break ;
}
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_free ( validityBuf ) ;
if ( ! allZero ) {
return SQLITE_OK ;
2024-07-23 08:59:34 -07:00
}
2026-03-29 19:44:44 -07:00
// All validity bits are zero — delete this chunk and its associated data
char * zSql ;
sqlite3_stmt * stmt ;
2024-04-20 13:38:58 -07:00
2026-03-29 19:44:44 -07:00
// Delete from _chunks
zSql = sqlite3_mprintf (
" DELETE FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE rowid = ? " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql )
return SQLITE_NOMEM ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK )
return rc ;
sqlite3_bind_int64 ( stmt , 1 , chunk_id ) ;
rc = sqlite3_step ( stmt ) ;
sqlite3_finalize ( stmt ) ;
if ( rc ! = SQLITE_DONE )
return SQLITE_ERROR ;
2024-07-05 12:07:45 -07:00
2026-03-29 19:44:44 -07:00
// Delete from each _vector_chunksNN
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
2026-03-31 13:51:08 -07:00
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
2026-03-29 19:46:23 -07:00
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_FLAT )
2026-03-29 19:45:54 -07:00
continue ;
2026-03-29 19:44:44 -07:00
zSql = sqlite3_mprintf (
" DELETE FROM " VEC0_SHADOW_VECTOR_N_NAME " WHERE rowid = ? " ,
p - > schemaName , p - > tableName , i ) ;
if ( ! zSql )
return SQLITE_NOMEM ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK )
return rc ;
sqlite3_bind_int64 ( stmt , 1 , chunk_id ) ;
rc = sqlite3_step ( stmt ) ;
sqlite3_finalize ( stmt ) ;
if ( rc ! = SQLITE_DONE )
return SQLITE_ERROR ;
2024-06-28 10:51:49 -07:00
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
rc = rescore_delete_chunk ( p , chunk_id ) ;
if ( rc ! = SQLITE_OK )
return rc ;
# endif
2026-03-29 19:44:44 -07:00
// Delete from each _metadatachunksNN
for ( int i = 0 ; i < p - > numMetadataColumns ; i + + ) {
zSql = sqlite3_mprintf (
" DELETE FROM " VEC0_SHADOW_METADATA_N_NAME " WHERE rowid = ? " ,
p - > schemaName , p - > tableName , i ) ;
if ( ! zSql )
return SQLITE_NOMEM ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK )
return rc ;
sqlite3_bind_int64 ( stmt , 1 , chunk_id ) ;
rc = sqlite3_step ( stmt ) ;
sqlite3_finalize ( stmt ) ;
if ( rc ! = SQLITE_DONE )
return SQLITE_ERROR ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
// Invalidate cached stmtLatestChunk so it gets re-prepared on next insert
if ( p - > stmtLatestChunk ) {
sqlite3_finalize ( p - > stmtLatestChunk ) ;
p - > stmtLatestChunk = NULL ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
* deleted = 1 ;
return SQLITE_OK ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
int vec0Update_Delete_DeleteRowids ( vec0_vtab * p , i64 rowid ) {
int rc ;
sqlite3_stmt * stmt = NULL ;
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
char * zSql =
sqlite3_mprintf ( " DELETE FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE rowid = ? " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql ) {
return SQLITE_NOMEM ;
}
2024-07-05 12:07:45 -07:00
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
}
sqlite3_bind_int64 ( stmt , 1 , rowid ) ;
rc = sqlite3_step ( stmt ) ;
if ( rc ! = SQLITE_DONE ) {
goto cleanup ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
rc = SQLITE_OK ;
cleanup :
sqlite3_finalize ( stmt ) ;
2024-05-17 12:03:45 -07:00
return rc ;
}
2026-03-29 19:44:44 -07:00
int vec0Update_Delete_DeleteAux ( vec0_vtab * p , i64 rowid ) {
int rc ;
sqlite3_stmt * stmt = NULL ;
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
char * zSql =
sqlite3_mprintf ( " DELETE FROM " VEC0_SHADOW_AUXILIARY_NAME " WHERE rowid = ? " ,
p - > schemaName , p - > tableName ) ;
if ( ! zSql ) {
return SQLITE_NOMEM ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) {
goto cleanup ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
sqlite3_bind_int64 ( stmt , 1 , rowid ) ;
rc = sqlite3_step ( stmt ) ;
if ( rc ! = SQLITE_DONE ) {
goto cleanup ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
rc = SQLITE_OK ;
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
cleanup :
sqlite3_finalize ( stmt ) ;
return rc ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
int vec0Update_Delete_ClearMetadata ( vec0_vtab * p , int metadata_idx , i64 rowid , i64 chunk_id ,
u64 chunk_offset ) {
int rc ;
sqlite3_blob * blobValue ;
vec0_metadata_column_kind kind = p - > metadata_columns [ metadata_idx ] . kind ;
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowMetadataChunksNames [ metadata_idx ] , " data " , chunk_id , 1 , & blobValue ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
switch ( kind ) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN : {
u8 block ;
rc = sqlite3_blob_read ( blobValue , & block , sizeof ( u8 ) , ( int ) ( chunk_offset / CHAR_BIT ) ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
block & = ~ ( 1 < < ( chunk_offset % CHAR_BIT ) ) ;
rc = sqlite3_blob_write ( blobValue , & block , sizeof ( u8 ) , chunk_offset / CHAR_BIT ) ;
break ;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER : {
i64 v = 0 ;
rc = sqlite3_blob_write ( blobValue , & v , sizeof ( v ) , chunk_offset * sizeof ( i64 ) ) ;
break ;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT : {
double v = 0 ;
rc = sqlite3_blob_write ( blobValue , & v , sizeof ( v ) , chunk_offset * sizeof ( double ) ) ;
break ;
}
case VEC0_METADATA_COLUMN_KIND_TEXT : {
int n ;
rc = sqlite3_blob_read ( blobValue , & n , sizeof ( int ) , chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
u8 view [ VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ] ;
memset ( view , 0 , VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ) ;
rc = sqlite3_blob_write ( blobValue , & view , sizeof ( view ) , chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
if ( n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH ) {
const char * zSql = sqlite3_mprintf ( " DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ? " , p - > schemaName , p - > tableName , metadata_idx ) ;
if ( ! zSql ) {
rc = SQLITE_NOMEM ;
goto done ;
}
sqlite3_stmt * stmt ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
if ( rc ! = SQLITE_OK ) {
goto done ;
}
sqlite3_bind_int64 ( stmt , 1 , rowid ) ;
rc = sqlite3_step ( stmt ) ;
2026-03-31 01:03:32 -07:00
sqlite3_finalize ( stmt ) ;
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_DONE ) {
rc = SQLITE_ERROR ;
goto done ;
}
2026-03-31 01:03:32 -07:00
// Fix for https://github.com/asg017/sqlite-vec/issues/274
// sqlite3_step returns SQLITE_DONE (101) on DML success, but the
// `done:` epilogue treats anything other than SQLITE_OK as an error.
// Without this, SQLITE_DONE propagates up to vec0Update_Delete,
// which aborts the DELETE scan and silently drops remaining rows.
rc = SQLITE_OK ;
2026-03-29 19:44:44 -07:00
}
break ;
2024-05-17 12:03:45 -07:00
}
}
2026-03-29 19:44:44 -07:00
int rc2 ;
done :
rc2 = sqlite3_blob_close ( blobValue ) ;
if ( rc = = SQLITE_OK ) {
return rc2 ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
return rc ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
int vec0Update_Delete ( sqlite3_vtab * pVTab , sqlite3_value * idValue ) {
vec0_vtab * p = ( vec0_vtab * ) pVTab ;
int rc ;
i64 rowid ;
2026-03-31 13:51:08 -07:00
i64 chunk_id = 0 ;
i64 chunk_offset = 0 ;
2026-03-29 19:44:44 -07:00
if ( p - > pkIsText ) {
rc = vec0_rowid_from_id ( p , idValue , & rowid ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
} else {
rowid = sqlite3_value_int64 ( idValue ) ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
// 1. Find chunk position for given rowid
// 2. Ensure that validity bit for position is 1, then set to 0
// 3. Zero out rowid in chunks.rowid
// 4. Zero out vector data in all vector column chunks
// 5. Delete value in _rowids table
2024-05-17 12:03:45 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
// DiskANN graph deletion for indexed columns
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_DISKANN ) continue ;
rc = diskann_delete ( p , i , rowid ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2026-03-29 19:44:44 -07:00
}
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# endif
2024-05-17 12:03:45 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
if ( ! vec0_all_columns_diskann ( p ) ) {
// 1. get chunk_id and chunk_offset from _rowids
rc = vec0_get_chunk_position ( p , rowid , NULL , & chunk_id , & chunk_offset ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2024-07-31 12:56:09 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// 2. clear validity bit
rc = vec0Update_Delete_ClearValidity ( p , chunk_id , chunk_offset ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2026-03-29 19:44:44 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// 3. zero out rowid in chunks.rowids
rc = vec0Update_Delete_ClearRowid ( p , chunk_id , chunk_offset ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
// 4. zero out any data in vector chunks tables
rc = vec0Update_Delete_ClearVectors ( p , chunk_id , chunk_offset ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
2026-03-31 13:51:08 -07:00
// 4b. zero out quantized data in rescore chunk tables, delete from rescore vectors
rc = rescore_on_delete ( p , chunk_id , chunk_offset , rowid ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2026-03-29 19:45:54 -07:00
# endif
2026-03-31 13:51:08 -07:00
}
2026-03-29 19:45:54 -07:00
2026-03-29 19:44:44 -07:00
// 5. delete from _rowids table
rc = vec0Update_Delete_DeleteRowids ( p , rowid ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
// 6. delete any auxiliary rows
if ( p - > numAuxiliaryColumns > 0 ) {
rc = vec0Update_Delete_DeleteAux ( p , rowid ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
2024-05-17 12:03:45 -07:00
}
}
2026-03-29 19:44:44 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// 7. delete metadata and reclaim chunk (only when using chunk-based storage)
if ( ! vec0_all_columns_diskann ( p ) ) {
for ( int i = 0 ; i < p - > numMetadataColumns ; i + + ) {
rc = vec0Update_Delete_ClearMetadata ( p , i , rowid , chunk_id , chunk_offset ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2026-03-29 19:44:44 -07:00
}
2024-05-17 12:03:45 -07:00
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// 8. reclaim chunk if fully empty
{
int chunkDeleted ;
rc = vec0Update_Delete_DeleteChunkIfEmpty ( p , chunk_id , & chunkDeleted ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2026-03-29 19:44:44 -07:00
}
}
2024-05-17 12:03:45 -07:00
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
// 7. delete from IVF index
for ( int i = 0 ; i < p - > numVectorColumns ; i + + ) {
if ( p - > vector_columns [ i ] . index_type ! = VEC0_INDEX_TYPE_IVF ) continue ;
rc = ivf_delete ( p , i , rowid ) ;
if ( rc ! = SQLITE_OK ) return rc ;
}
# endif
2024-05-17 12:03:45 -07:00
return SQLITE_OK ;
}
2026-03-29 19:44:44 -07:00
int vec0Update_UpdateAuxColumn ( vec0_vtab * p , int auxiliary_column_idx , sqlite3_value * value , i64 rowid ) {
int rc ;
sqlite3_stmt * stmt ;
const char * zSql = sqlite3_mprintf ( " UPDATE " VEC0_SHADOW_AUXILIARY_NAME " SET value%02d = ? WHERE rowid = ? " , p - > schemaName , p - > tableName , auxiliary_column_idx ) ;
if ( ! zSql ) {
2024-05-17 12:03:45 -07:00
return SQLITE_NOMEM ;
2026-03-29 19:44:44 -07:00
}
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmt , NULL ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
sqlite3_bind_value ( stmt , 1 , value ) ;
sqlite3_bind_int64 ( stmt , 2 , rowid ) ;
rc = sqlite3_step ( stmt ) ;
if ( rc ! = SQLITE_DONE ) {
sqlite3_finalize ( stmt ) ;
return SQLITE_ERROR ;
}
sqlite3_finalize ( stmt ) ;
2024-05-17 12:03:45 -07:00
return SQLITE_OK ;
}
2026-03-29 19:44:44 -07:00
int vec0Update_UpdateVectorColumn ( vec0_vtab * p , i64 chunk_id , i64 chunk_offset ,
2026-03-29 19:45:54 -07:00
int i , sqlite3_value * valueVector , i64 rowid ) {
2026-03-29 19:44:44 -07:00
int rc ;
2026-03-29 19:45:54 -07:00
# if !SQLITE_VEC_ENABLE_RESCORE
UNUSED_PARAMETER ( rowid ) ;
# endif
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
sqlite3_blob * blobVectors = NULL ;
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
char * pzError ;
size_t dimensions ;
enum VectorElementType elementType ;
void * vector ;
vector_cleanup cleanup = vector_cleanup_noop ;
// https://github.com/asg017/sqlite-vec/issues/53
rc = vector_from_value ( valueVector , & vector , & dimensions , & elementType ,
& cleanup , & pzError ) ;
if ( rc ! = SQLITE_OK ) {
// IMP: V15203_32042
vtab_set_error (
& p - > base , " Updated vector for the \" %.*s \" column is invalid: %z " ,
p - > vector_columns [ i ] . name_length , p - > vector_columns [ i ] . name , pzError ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
if ( elementType ! = p - > vector_columns [ i ] . element_type ) {
// IMP: V03643_20481
vtab_set_error (
& p - > base ,
" Updated vector for the \" %.*s \" column is expected to be of type "
" %s, but a %s vector was provided. " ,
p - > vector_columns [ i ] . name_length , p - > vector_columns [ i ] . name ,
vector_subtype_name ( p - > vector_columns [ i ] . element_type ) ,
vector_subtype_name ( elementType ) ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
if ( dimensions ! = p - > vector_columns [ i ] . dimensions ) {
// IMP: V25739_09810
vtab_set_error (
& p - > base ,
" Dimension mismatch for new updated vector for the \" %.*s \" column. "
" Expected %d dimensions but received %d. " ,
p - > vector_columns [ i ] . name_length , p - > vector_columns [ i ] . name ,
p - > vector_columns [ i ] . dimensions , dimensions ) ;
rc = SQLITE_ERROR ;
goto cleanup ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
if ( p - > vector_columns [ i ] . index_type = = VEC0_INDEX_TYPE_RESCORE ) {
// For rescore columns, update _rescore_vectors and _rescore_chunks
struct VectorColumnDefinition * col = & p - > vector_columns [ i ] ;
size_t qsize = rescore_quantized_byte_size ( col ) ;
size_t fsize = vector_column_byte_size ( * col ) ;
// 1. Update quantized chunk
{
void * qbuf = sqlite3_malloc ( qsize ) ;
if ( ! qbuf ) { rc = SQLITE_NOMEM ; goto cleanup ; }
switch ( col - > rescore . quantizer_type ) {
case VEC0_RESCORE_QUANTIZER_BIT :
rescore_quantize_float_to_bit ( ( const float * ) vector , ( uint8_t * ) qbuf , col - > dimensions ) ;
break ;
case VEC0_RESCORE_QUANTIZER_INT8 :
rescore_quantize_float_to_int8 ( ( const float * ) vector , ( int8_t * ) qbuf , col - > dimensions ) ;
break ;
}
sqlite3_blob * blobQ = NULL ;
rc = sqlite3_blob_open ( p - > db , p - > schemaName ,
p - > shadowRescoreChunksNames [ i ] , " vectors " ,
chunk_id , 1 , & blobQ ) ;
if ( rc ! = SQLITE_OK ) { sqlite3_free ( qbuf ) ; goto cleanup ; }
rc = sqlite3_blob_write ( blobQ , qbuf , qsize , chunk_offset * qsize ) ;
sqlite3_free ( qbuf ) ;
int brc2 = sqlite3_blob_close ( blobQ ) ;
if ( rc ! = SQLITE_OK ) goto cleanup ;
if ( brc2 ! = SQLITE_OK ) { rc = brc2 ; goto cleanup ; }
}
// 2. Update float vector in _rescore_vectors (keyed by user rowid)
{
char * zSql = sqlite3_mprintf (
" UPDATE \" %w \" . \" %w \" SET vector = ? WHERE rowid = ? " ,
p - > schemaName , p - > shadowRescoreVectorsNames [ i ] ) ;
if ( ! zSql ) { rc = SQLITE_NOMEM ; goto cleanup ; }
sqlite3_stmt * stmtUp ;
rc = sqlite3_prepare_v2 ( p - > db , zSql , - 1 , & stmtUp , NULL ) ;
sqlite3_free ( zSql ) ;
if ( rc ! = SQLITE_OK ) goto cleanup ;
sqlite3_bind_blob ( stmtUp , 1 , vector , fsize , SQLITE_TRANSIENT ) ;
sqlite3_bind_int64 ( stmtUp , 2 , rowid ) ;
rc = sqlite3_step ( stmtUp ) ;
sqlite3_finalize ( stmtUp ) ;
if ( rc ! = SQLITE_DONE ) { rc = SQLITE_ERROR ; goto cleanup ; }
}
rc = SQLITE_OK ;
goto cleanup ;
}
# endif
2026-03-29 19:44:44 -07:00
rc = sqlite3_blob_open ( p - > db , p - > schemaName , p - > shadowVectorChunksNames [ i ] ,
" vectors " , chunk_id , 1 , & blobVectors ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , " Could not open vectors blob for %s.%s.%lld " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_id ) ;
goto cleanup ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
rc = vec0_write_vector_to_vector_blob ( blobVectors , chunk_offset , vector ,
p - > vector_columns [ i ] . dimensions ,
p - > vector_columns [ i ] . element_type ) ;
if ( rc ! = SQLITE_OK ) {
vtab_set_error ( & p - > base , " Could not write to vectors blob for %s.%s.%lld " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_id ) ;
goto cleanup ;
}
cleanup :
cleanup ( vector ) ;
int brc = sqlite3_blob_close ( blobVectors ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
if ( brc ! = SQLITE_OK ) {
vtab_set_error (
& p - > base ,
" Could not commit blob transaction for vectors blob for %s.%s.%lld " ,
p - > schemaName , p - > shadowVectorChunksNames [ i ] , chunk_id ) ;
return brc ;
}
return SQLITE_OK ;
}
int vec0Update_Update ( sqlite3_vtab * pVTab , int argc , sqlite3_value * * argv ) {
UNUSED_PARAMETER ( argc ) ;
vec0_vtab * p = ( vec0_vtab * ) pVTab ;
int rc ;
i64 chunk_id ;
i64 chunk_offset ;
i64 rowid ;
if ( p - > pkIsText ) {
const char * a = ( const char * ) sqlite3_value_text ( argv [ 0 ] ) ;
const char * b = ( const char * ) sqlite3_value_text ( argv [ 1 ] ) ;
// IMP: V08886_25725
if ( ( sqlite3_value_bytes ( argv [ 0 ] ) ! = sqlite3_value_bytes ( argv [ 1 ] ) ) | |
strncmp ( a , b , sqlite3_value_bytes ( argv [ 0 ] ) ) ! = 0 ) {
vtab_set_error ( pVTab ,
" UPDATEs on vec0 primary key values are not allowed. " ) ;
2024-05-17 12:03:45 -07:00
return SQLITE_ERROR ;
}
2026-03-29 19:44:44 -07:00
rc = vec0_rowid_from_id ( p , argv [ 0 ] , & rowid ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
} else {
rowid = sqlite3_value_int64 ( argv [ 0 ] ) ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
// 1) get chunk_id and chunk_offset from _rowids
rc = vec0_get_chunk_position ( p , rowid , NULL , & chunk_id , & chunk_offset ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
// 2) update any partition key values
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION ) {
continue ;
}
sqlite3_value * value = argv [ 2 + VEC0_COLUMN_USERN_START + i ] ;
if ( sqlite3_value_nochange ( value ) ) {
continue ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
vtab_set_error ( pVTab , " UPDATE on partition key columns are not supported yet. " ) ;
return SQLITE_ERROR ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
// 3) handle auxiliary column updates
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY ) {
continue ;
2024-07-05 12:07:45 -07:00
}
2026-03-29 19:44:44 -07:00
int auxiliary_column_idx = p - > user_column_idxs [ i ] ;
sqlite3_value * value = argv [ 2 + VEC0_COLUMN_USERN_START + i ] ;
if ( sqlite3_value_nochange ( value ) ) {
continue ;
2024-07-05 12:07:45 -07:00
}
2026-03-29 19:44:44 -07:00
rc = vec0Update_UpdateAuxColumn ( p , auxiliary_column_idx , value , rowid ) ;
if ( rc ! = SQLITE_OK ) {
2024-07-05 12:07:45 -07:00
return SQLITE_ERROR ;
}
2026-03-29 19:44:44 -07:00
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
// 4) handle metadata column updates
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_METADATA ) {
continue ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
int metadata_column_idx = p - > user_column_idxs [ i ] ;
sqlite3_value * value = argv [ 2 + VEC0_COLUMN_USERN_START + i ] ;
if ( sqlite3_value_nochange ( value ) ) {
continue ;
2024-07-05 12:07:45 -07:00
}
2026-03-29 19:44:44 -07:00
rc = vec0_write_metadata_value ( p , metadata_column_idx , rowid , chunk_id , chunk_offset , value , 1 ) ;
if ( rc ! = SQLITE_OK ) {
return rc ;
2024-07-05 12:07:45 -07:00
}
2026-03-29 19:44:44 -07:00
}
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
// 5) iterate over all new vectors, update the vectors
for ( int i = 0 ; i < vec0_num_defined_user_columns ( p ) ; i + + ) {
if ( p - > user_column_kinds [ i ] ! = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR ) {
continue ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
int vector_idx = p - > user_column_idxs [ i ] ;
sqlite3_value * valueVector = argv [ 2 + VEC0_COLUMN_USERN_START + i ] ;
// in vec0Column, we check sqlite3_vtab_nochange() on vector columns.
// If the vector column isn't being changed, we return NULL;
// That's not great, that means vector columns can never be NULLABLE
// (bc we cant distinguish if an updated vector is truly NULL or nochange).
// Also it means that if someone tries to run `UPDATE v SET X = NULL`,
// we can't effectively detect and raise an error.
// A better solution would be to use a custom result_type for "empty",
// but subtypes don't appear to survive xColumn -> xUpdate, it's always 0.
// So for now, we'll just use NULL and warn people to not SET X = NULL
// in the docs.
if ( sqlite3_value_type ( valueVector ) = = SQLITE_NULL ) {
continue ;
2024-07-31 12:56:09 -07:00
}
2024-05-17 12:03:45 -07:00
2026-03-31 14:08:08 -07:00
// Block vector UPDATE for index types that don't implement it —
// the DiskANN graph / IVF lists would become stale.
{
enum Vec0IndexType idx_type = p - > vector_columns [ vector_idx ] . index_type ;
const char * idx_name = NULL ;
if ( idx_type = = VEC0_INDEX_TYPE_DISKANN ) idx_name = " DiskANN " ;
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
else if ( idx_type = = VEC0_INDEX_TYPE_IVF ) idx_name = " IVF " ;
# endif
if ( idx_name ) {
vtab_set_error (
& p - > base ,
" UPDATE on vector column \" %.*s \" is not supported for %s indexes. " ,
p - > vector_columns [ vector_idx ] . name_length ,
p - > vector_columns [ vector_idx ] . name ,
idx_name ) ;
return SQLITE_ERROR ;
}
}
2026-03-29 19:44:44 -07:00
rc = vec0Update_UpdateVectorColumn ( p , chunk_id , chunk_offset , vector_idx ,
2026-03-29 19:45:54 -07:00
valueVector , rowid ) ;
2026-03-29 19:44:44 -07:00
if ( rc ! = SQLITE_OK ) {
return SQLITE_ERROR ;
}
2024-05-17 12:03:45 -07:00
}
return SQLITE_OK ;
}
2026-03-29 19:44:44 -07:00
static int vec0Update ( sqlite3_vtab * pVTab , int argc , sqlite3_value * * argv ,
sqlite_int64 * pRowid ) {
// DELETE operation
if ( argc = = 1 & & sqlite3_value_type ( argv [ 0 ] ) ! = SQLITE_NULL ) {
return vec0Update_Delete ( pVTab , argv [ 0 ] ) ;
2024-07-31 12:56:09 -07:00
}
2026-03-29 19:44:44 -07:00
// INSERT operation
else if ( argc > 1 & & sqlite3_value_type ( argv [ 0 ] ) = = SQLITE_NULL ) {
2026-03-31 01:26:55 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE || SQLITE_VEC_ENABLE_DISKANN
// Check for command inserts: INSERT INTO t(rowid) VALUES ('command-string')
2026-03-29 19:46:23 -07:00
// The id column holds the command string.
sqlite3_value * idVal = argv [ 2 + VEC0_COLUMN_ID ] ;
if ( sqlite3_value_type ( idVal ) = = SQLITE_TEXT ) {
const char * cmd = ( const char * ) sqlite3_value_text ( idVal ) ;
vec0_vtab * p = ( vec0_vtab * ) pVTab ;
2026-03-31 01:26:55 -07:00
int cmdRc = SQLITE_EMPTY ;
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
cmdRc = ivf_handle_command ( p , cmd , argc , argv ) ;
# endif
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
if ( cmdRc = = SQLITE_EMPTY )
cmdRc = diskann_handle_command ( p , cmd ) ;
# endif
2026-03-29 19:46:23 -07:00
if ( cmdRc ! = SQLITE_EMPTY ) return cmdRc ; // handled (or error)
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
// SQLITE_EMPTY means not a recognized command — fall through to normal insert
2026-03-29 19:46:23 -07:00
}
# endif
2026-03-29 19:44:44 -07:00
return vec0Update_Insert ( pVTab , argc , argv , pRowid ) ;
2024-07-31 12:56:09 -07:00
}
2026-03-29 19:44:44 -07:00
// UPDATE operation
else if ( argc > 1 & & sqlite3_value_type ( argv [ 0 ] ) ! = SQLITE_NULL ) {
return vec0Update_Update ( pVTab , argc , argv ) ;
} else {
vtab_set_error ( pVTab , " Unrecognized xUpdate operation provided for vec0. " ) ;
return SQLITE_ERROR ;
2024-07-31 12:56:09 -07:00
}
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
static int vec0ShadowName ( const char * zName ) {
static const char * azName [ ] = {
" rowids " , " chunks " , " auxiliary " , " info " ,
// Up to VEC0_MAX_METADATA_COLUMNS
// TODO be smarter about this man
" metadatachunks00 " ,
" metadatachunks01 " ,
" metadatachunks02 " ,
" metadatachunks03 " ,
" metadatachunks04 " ,
" metadatachunks05 " ,
" metadatachunks06 " ,
" metadatachunks07 " ,
" metadatachunks08 " ,
" metadatachunks09 " ,
" metadatachunks10 " ,
" metadatachunks11 " ,
" metadatachunks12 " ,
" metadatachunks13 " ,
" metadatachunks14 " ,
" metadatachunks15 " ,
// Up to
" metadatatext00 " ,
" metadatatext01 " ,
" metadatatext02 " ,
" metadatatext03 " ,
" metadatatext04 " ,
" metadatatext05 " ,
" metadatatext06 " ,
" metadatatext07 " ,
" metadatatext08 " ,
" metadatatext09 " ,
" metadatatext10 " ,
" metadatatext11 " ,
" metadatatext12 " ,
" metadatatext13 " ,
" metadatatext14 " ,
" metadatatext15 " ,
} ;
for ( size_t i = 0 ; i < sizeof ( azName ) / sizeof ( azName [ 0 ] ) ; i + + ) {
if ( sqlite3_stricmp ( zName , azName [ i ] ) = = 0 )
return 1 ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
//for(size_t i = 0; i < )"vector_chunks", "metadatachunks"
return 0 ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
static int vec0Begin ( sqlite3_vtab * pVTab ) {
UNUSED_PARAMETER ( pVTab ) ;
return SQLITE_OK ;
}
static int vec0Sync ( sqlite3_vtab * pVTab ) {
UNUSED_PARAMETER ( pVTab ) ;
vec0_vtab * p = ( vec0_vtab * ) pVTab ;
if ( p - > stmtLatestChunk ) {
sqlite3_finalize ( p - > stmtLatestChunk ) ;
p - > stmtLatestChunk = NULL ;
2024-06-13 16:32:57 -07:00
}
2026-03-29 19:44:44 -07:00
if ( p - > stmtRowidsInsertRowid ) {
sqlite3_finalize ( p - > stmtRowidsInsertRowid ) ;
p - > stmtRowidsInsertRowid = NULL ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
if ( p - > stmtRowidsInsertId ) {
sqlite3_finalize ( p - > stmtRowidsInsertId ) ;
p - > stmtRowidsInsertId = NULL ;
2024-06-13 16:32:57 -07:00
}
2026-03-29 19:44:44 -07:00
if ( p - > stmtRowidsUpdatePosition ) {
sqlite3_finalize ( p - > stmtRowidsUpdatePosition ) ;
p - > stmtRowidsUpdatePosition = NULL ;
2024-06-13 16:32:57 -07:00
}
2026-03-29 19:44:44 -07:00
if ( p - > stmtRowidsGetChunkPosition ) {
sqlite3_finalize ( p - > stmtRowidsGetChunkPosition ) ;
p - > stmtRowidsGetChunkPosition = NULL ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
return SQLITE_OK ;
}
static int vec0Commit ( sqlite3_vtab * pVTab ) {
UNUSED_PARAMETER ( pVTab ) ;
return SQLITE_OK ;
}
static int vec0Rollback ( sqlite3_vtab * pVTab ) {
UNUSED_PARAMETER ( pVTab ) ;
return SQLITE_OK ;
2024-05-17 12:03:45 -07:00
}
2026-03-29 19:44:44 -07:00
static sqlite3_module vec0Module = {
/* iVersion */ 3 ,
/* xCreate */ vec0Create ,
/* xConnect */ vec0Connect ,
/* xBestIndex */ vec0BestIndex ,
/* xDisconnect */ vec0Disconnect ,
/* xDestroy */ vec0Destroy ,
/* xOpen */ vec0Open ,
/* xClose */ vec0Close ,
/* xFilter */ vec0Filter ,
/* xNext */ vec0Next ,
/* xEof */ vec0Eof ,
/* xColumn */ vec0Column ,
/* xRowid */ vec0Rowid ,
/* xUpdate */ vec0Update ,
/* xBegin */ vec0Begin ,
/* xSync */ vec0Sync ,
/* xCommit */ vec0Commit ,
/* xRollback */ vec0Rollback ,
/* xFindFunction */ 0 ,
/* xRename */ 0 , // https://github.com/asg017/sqlite-vec/issues/43
/* xSavepoint */ 0 ,
/* xRelease */ 0 ,
/* xRollbackTo */ 0 ,
/* xShadowName */ vec0ShadowName ,
2024-07-05 12:07:45 -07:00
# if SQLITE_VERSION_NUMBER >= 3044000
2026-03-29 19:44:44 -07:00
/* xIntegrity */ 0 , // https://github.com/asg017/sqlite-vec/issues/44
2024-05-17 12:03:45 -07:00
# endif
2024-07-05 12:07:45 -07:00
} ;
# pragma endregion
2024-05-17 12:03:45 -07:00
2026-03-29 19:44:44 -07:00
2024-04-20 13:38:58 -07:00
# ifdef SQLITE_VEC_ENABLE_AVX
# define SQLITE_VEC_DEBUG_BUILD_AVX "avx"
# else
# define SQLITE_VEC_DEBUG_BUILD_AVX ""
# endif
# ifdef SQLITE_VEC_ENABLE_NEON
# define SQLITE_VEC_DEBUG_BUILD_NEON "neon"
# else
# define SQLITE_VEC_DEBUG_BUILD_NEON ""
# endif
2026-03-29 19:45:54 -07:00
# if SQLITE_VEC_ENABLE_RESCORE
# define SQLITE_VEC_DEBUG_BUILD_RESCORE "rescore"
# else
# define SQLITE_VEC_DEBUG_BUILD_RESCORE ""
# endif
2024-04-20 13:38:58 -07:00
2026-03-30 23:17:30 -07:00
# if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
2026-03-29 19:46:23 -07:00
# define SQLITE_VEC_DEBUG_BUILD_IVF "ivf"
# else
# define SQLITE_VEC_DEBUG_BUILD_IVF ""
# endif
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
# if SQLITE_VEC_ENABLE_DISKANN
# define SQLITE_VEC_DEBUG_BUILD_DISKANN "diskann"
# else
# define SQLITE_VEC_DEBUG_BUILD_DISKANN ""
# endif
2024-04-20 13:38:58 -07:00
# define SQLITE_VEC_DEBUG_BUILD \
2026-03-29 19:45:54 -07:00
SQLITE_VEC_DEBUG_BUILD_AVX " " SQLITE_VEC_DEBUG_BUILD_NEON " " \
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R
(max degree) and L (search list size, separate for insert/query), supports
int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized
query optimization, and insert buffer reuse. Includes shadow table management,
delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN),
release-demo workflow, fuzz targets, and tests. Fixes rescore int8
quantization bug.
2026-03-29 19:46:53 -07:00
SQLITE_VEC_DEBUG_BUILD_RESCORE " " SQLITE_VEC_DEBUG_BUILD_IVF " " \
SQLITE_VEC_DEBUG_BUILD_DISKANN
2024-04-20 13:38:58 -07:00
# define SQLITE_VEC_DEBUG_STRING \
" Version: " SQLITE_VEC_VERSION " \n " \
" Date: " SQLITE_VEC_DATE " \n " \
" Commit: " SQLITE_VEC_SOURCE " \n " \
" Build flags: " SQLITE_VEC_DEBUG_BUILD
2024-08-09 10:44:39 -07:00
SQLITE_VEC_API int sqlite3_vec_init ( sqlite3 * db , char * * pzErrMsg ,
2024-08-10 23:33:28 -07:00
const sqlite3_api_routines * pApi ) {
2024-09-20 13:17:57 -07:00
# ifndef SQLITE_CORE
2024-04-20 13:38:58 -07:00
SQLITE_EXTENSION_INIT2 ( pApi ) ;
2024-09-20 13:17:57 -07:00
# endif
2024-04-20 13:38:58 -07:00
int rc = SQLITE_OK ;
2024-07-05 12:07:45 -07:00
# define DEFAULT_FLAGS (SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC)
rc = sqlite3_create_function_v2 ( db , " vec_version " , 0 , DEFAULT_FLAGS ,
SQLITE_VEC_VERSION , _static_text_func , NULL ,
NULL , NULL ) ;
if ( rc ! = SQLITE_OK ) {
2024-06-28 22:03:54 -07:00
return rc ;
}
2024-07-05 12:07:45 -07:00
rc = sqlite3_create_function_v2 ( db , " vec_debug " , 0 , DEFAULT_FLAGS ,
SQLITE_VEC_DEBUG_STRING , _static_text_func ,
NULL , NULL , NULL ) ;
if ( rc ! = SQLITE_OK ) {
2024-06-28 22:03:54 -07:00
return rc ;
}
2024-06-28 21:38:50 -07:00
static struct {
2024-06-28 20:56:51 -07:00
const char * zFName ;
2024-04-20 13:38:58 -07:00
void ( * xFunc ) ( sqlite3_context * , int , sqlite3_value * * ) ;
int nArg ;
int flags ;
} aFunc [ ] = {
// clang-format off
2024-06-28 22:03:54 -07:00
//{"vec_version", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_VERSION },
//{"vec_debug", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_DEBUG_STRING },
{ " vec_distance_l2 " , vec_distance_l2 , 2 , DEFAULT_FLAGS | SQLITE_SUBTYPE , } ,
2024-07-23 14:04:17 -07:00
{ " vec_distance_l1 " , vec_distance_l1 , 2 , DEFAULT_FLAGS | SQLITE_SUBTYPE , } ,
2024-06-28 22:03:54 -07:00
{ " vec_distance_hamming " , vec_distance_hamming , 2 , DEFAULT_FLAGS | SQLITE_SUBTYPE , } ,
{ " vec_distance_cosine " , vec_distance_cosine , 2 , DEFAULT_FLAGS | SQLITE_SUBTYPE , } ,
{ " vec_length " , vec_length , 1 , DEFAULT_FLAGS | SQLITE_SUBTYPE , } ,
2024-07-22 21:24:44 -07:00
{ " vec_type " , vec_type , 1 , DEFAULT_FLAGS , } ,
2024-06-28 22:03:54 -07:00
{ " vec_to_json " , vec_to_json , 1 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
{ " vec_add " , vec_add , 2 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
{ " vec_sub " , vec_sub , 2 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
{ " vec_slice " , vec_slice , 3 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
{ " vec_normalize " , vec_normalize , 1 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
{ " vec_f32 " , vec_f32 , 1 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
{ " vec_bit " , vec_bit , 1 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
{ " vec_int8 " , vec_int8 , 1 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
2024-07-22 21:24:44 -07:00
{ " vec_quantize_int8 " , vec_quantize_int8 , 2 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
2024-06-28 22:03:54 -07:00
{ " vec_quantize_binary " , vec_quantize_binary , 1 , DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE , } ,
2024-04-20 13:38:58 -07:00
// clang-format on
} ;
2024-06-28 21:38:50 -07:00
static struct {
2024-04-20 13:38:58 -07:00
char * name ;
const sqlite3_module * module ;
2024-06-13 16:32:57 -07:00
void * p ;
void ( * xDestroy ) ( void * ) ;
2024-04-20 13:38:58 -07:00
} aMod [ ] = {
// clang-format off
2024-05-17 12:03:45 -07:00
{ " vec0 " , & vec0Module , NULL , NULL } ,
{ " vec_each " , & vec_eachModule , NULL , NULL } ,
2024-04-20 13:38:58 -07:00
// clang-format on
} ;
2024-07-05 12:07:45 -07:00
for ( unsigned long i = 0 ; i < countof ( aFunc ) & & rc = = SQLITE_OK ; i + + ) {
2024-04-20 13:38:58 -07:00
rc = sqlite3_create_function_v2 ( db , aFunc [ i ] . zFName , aFunc [ i ] . nArg ,
2024-07-05 12:07:45 -07:00
aFunc [ i ] . flags , NULL , aFunc [ i ] . xFunc , NULL ,
NULL , NULL ) ;
2024-04-20 13:38:58 -07:00
if ( rc ! = SQLITE_OK ) {
* pzErrMsg = sqlite3_mprintf ( " Error creating function %s: %s " ,
aFunc [ i ] . zFName , sqlite3_errmsg ( db ) ) ;
return rc ;
}
}
for ( unsigned long i = 0 ; i < countof ( aMod ) & & rc = = SQLITE_OK ; i + + ) {
rc = sqlite3_create_module_v2 ( db , aMod [ i ] . name , aMod [ i ] . module , NULL , NULL ) ;
if ( rc ! = SQLITE_OK ) {
* pzErrMsg = sqlite3_mprintf ( " Error creating module %s: %s " , aMod [ i ] . name ,
sqlite3_errmsg ( db ) ) ;
return rc ;
}
}
2024-07-31 12:56:09 -07:00
2024-04-20 13:38:58 -07:00
return SQLITE_OK ;
}
2024-07-31 12:56:09 -07:00