sqlite-vec/sqlite-vec.c

10438 lines
327 KiB
C
Raw Normal View History

2024-04-20 17:02:19 -07:00
#include "sqlite-vec.h"
2024-08-10 23:33:28 -07:00
2024-04-20 13:38:58 -07:00
#include <assert.h>
#include <errno.h>
2024-06-13 16:32:57 -07:00
#include <float.h>
2024-04-20 17:02:19 -07:00
#include <inttypes.h>
2024-04-20 13:38:58 -07:00
#include <limits.h>
#include <math.h>
#include <stdbool.h>
2024-04-20 17:02:19 -07:00
#include <stdint.h>
2024-04-20 13:38:58 -07:00
#include <stdlib.h>
#include <string.h>
#ifdef SQLITE_VEC_DEBUG
2024-08-10 23:33:28 -07:00
#include <stdio.h>
#endif
#ifndef SQLITE_CORE
2024-04-20 13:38:58 -07:00
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#else
2024-09-20 13:17:57 -07:00
#include "sqlite3.h"
#endif
#ifndef SQLITE_VEC_ENABLE_DISKANN
#define SQLITE_VEC_ENABLE_DISKANN 1
#endif
2024-04-20 17:02:19 -07:00
typedef int8_t i8;
typedef uint8_t u8;
typedef int16_t i16;
2024-04-20 17:02:19 -07:00
typedef int32_t i32;
typedef sqlite3_int64 i64;
typedef uint32_t u32;
typedef uint64_t u64;
typedef float f32;
typedef size_t usize;
2024-04-20 13:38:58 -07:00
#ifndef UNUSED_PARAMETER
#define UNUSED_PARAMETER(X) (void)(X)
#endif
2024-08-10 23:33:28 -07:00
// sqlite3_vtab_in() was added in SQLite version 3.38 (2022-02-22)
// https://www.sqlite.org/changes.html#version_3_38_0
#if SQLITE_VERSION_NUMBER >= 3038000
#define COMPILER_SUPPORTS_VTAB_IN 1
#endif
#ifndef SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
#define SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE 0
#endif
#ifndef SQLITE_SUBTYPE
#define SQLITE_SUBTYPE 0x000100000
#endif
#ifndef SQLITE_RESULT_SUBTYPE
#define SQLITE_RESULT_SUBTYPE 0x001000000
#endif
#ifndef SQLITE_INDEX_CONSTRAINT_LIMIT
2024-08-10 23:33:28 -07:00
#define SQLITE_INDEX_CONSTRAINT_LIMIT 73
#endif
#ifndef SQLITE_INDEX_CONSTRAINT_OFFSET
#define SQLITE_INDEX_CONSTRAINT_OFFSET 74
#endif
2024-04-20 13:38:58 -07:00
#define countof(x) (sizeof(x) / sizeof((x)[0]))
2024-07-05 12:07:45 -07:00
#define min(a, b) (((a) <= (b)) ? (a) : (b))
2024-04-20 13:38:58 -07:00
#ifndef SQLITE_VEC_ENABLE_RESCORE
#define SQLITE_VEC_ENABLE_RESCORE 1
#endif
2024-04-20 13:38:58 -07:00
enum VectorElementType {
2024-09-20 13:17:57 -07:00
// clang-format off
2024-04-20 13:38:58 -07:00
SQLITE_VEC_ELEMENT_TYPE_FLOAT32 = 223 + 0,
2024-09-20 13:17:57 -07:00
SQLITE_VEC_ELEMENT_TYPE_BIT = 223 + 1,
SQLITE_VEC_ELEMENT_TYPE_INT8 = 223 + 2,
// clang-format on
2024-04-20 13:38:58 -07:00
};
#ifdef SQLITE_VEC_ENABLE_AVX
#include <immintrin.h>
#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
#define PORTABLE_ALIGN64 __attribute__((aligned(64)))
2024-04-20 17:05:37 -07:00
static f32 l2_sqr_float_avx(const void *pVect1v, const void *pVect2v,
const void *qty_ptr) {
f32 *pVect1 = (f32 *)pVect1v;
f32 *pVect2 = (f32 *)pVect2v;
2024-04-20 13:38:58 -07:00
size_t qty = *((size_t *)qty_ptr);
2024-04-20 17:05:37 -07:00
f32 PORTABLE_ALIGN32 TmpRes[8];
2024-04-20 13:38:58 -07:00
size_t qty16 = qty >> 4;
2024-04-20 17:05:37 -07:00
const f32 *pEnd1 = pVect1 + (qty16 << 4);
2024-04-20 13:38:58 -07:00
__m256 diff, v1, v2;
__m256 sum = _mm256_set1_ps(0);
while (pVect1 < pEnd1) {
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
diff = _mm256_sub_ps(v1, v2);
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
v1 = _mm256_loadu_ps(pVect1);
pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2);
pVect2 += 8;
diff = _mm256_sub_ps(v1, v2);
sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
}
_mm256_store_ps(TmpRes, sum);
return sqrt(TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] +
TmpRes[5] + TmpRes[6] + TmpRes[7]);
}
#endif
#ifdef SQLITE_VEC_ENABLE_NEON
#include <arm_neon.h>
#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
// thx https://github.com/nmslib/hnswlib/pull/299/files
2024-04-20 17:05:37 -07:00
static f32 l2_sqr_float_neon(const void *pVect1v, const void *pVect2v,
const void *qty_ptr) {
f32 *pVect1 = (f32 *)pVect1v;
f32 *pVect2 = (f32 *)pVect2v;
2024-04-20 13:38:58 -07:00
size_t qty = *((size_t *)qty_ptr);
size_t qty16 = qty >> 4;
2024-04-20 17:05:37 -07:00
const f32 *pEnd1 = pVect1 + (qty16 << 4);
2024-04-20 13:38:58 -07:00
float32x4_t diff, v1, v2;
float32x4_t sum0 = vdupq_n_f32(0);
float32x4_t sum1 = vdupq_n_f32(0);
float32x4_t sum2 = vdupq_n_f32(0);
float32x4_t sum3 = vdupq_n_f32(0);
while (pVect1 < pEnd1) {
v1 = vld1q_f32(pVect1);
pVect1 += 4;
v2 = vld1q_f32(pVect2);
pVect2 += 4;
diff = vsubq_f32(v1, v2);
sum0 = vfmaq_f32(sum0, diff, diff);
v1 = vld1q_f32(pVect1);
pVect1 += 4;
v2 = vld1q_f32(pVect2);
pVect2 += 4;
diff = vsubq_f32(v1, v2);
sum1 = vfmaq_f32(sum1, diff, diff);
v1 = vld1q_f32(pVect1);
pVect1 += 4;
v2 = vld1q_f32(pVect2);
pVect2 += 4;
diff = vsubq_f32(v1, v2);
sum2 = vfmaq_f32(sum2, diff, diff);
v1 = vld1q_f32(pVect1);
pVect1 += 4;
v2 = vld1q_f32(pVect2);
pVect2 += 4;
diff = vsubq_f32(v1, v2);
sum3 = vfmaq_f32(sum3, diff, diff);
}
2024-06-13 16:32:57 -07:00
f32 sum_scalar =
vaddvq_f32(vaddq_f32(vaddq_f32(sum0, sum1), vaddq_f32(sum2, sum3)));
const f32 *pEnd2 = pVect1 + (qty - (qty16 << 4));
while (pVect1 < pEnd2) {
f32 diff = *pVect1 - *pVect2;
sum_scalar += diff * diff;
pVect1++;
pVect2++;
}
return sqrt(sum_scalar);
2024-04-20 13:38:58 -07:00
}
static f32 cosine_float_neon(const void *pVect1v, const void *pVect2v,
const void *qty_ptr) {
f32 *pVect1 = (f32 *)pVect1v;
f32 *pVect2 = (f32 *)pVect2v;
size_t qty = *((size_t *)qty_ptr);
size_t qty16 = qty >> 4;
const f32 *pEnd1 = pVect1 + (qty16 << 4);
float32x4_t dot0 = vdupq_n_f32(0), dot1 = vdupq_n_f32(0);
float32x4_t dot2 = vdupq_n_f32(0), dot3 = vdupq_n_f32(0);
float32x4_t amag0 = vdupq_n_f32(0), amag1 = vdupq_n_f32(0);
float32x4_t amag2 = vdupq_n_f32(0), amag3 = vdupq_n_f32(0);
float32x4_t bmag0 = vdupq_n_f32(0), bmag1 = vdupq_n_f32(0);
float32x4_t bmag2 = vdupq_n_f32(0), bmag3 = vdupq_n_f32(0);
while (pVect1 < pEnd1) {
float32x4_t v1, v2;
v1 = vld1q_f32(pVect1); pVect1 += 4;
v2 = vld1q_f32(pVect2); pVect2 += 4;
dot0 = vfmaq_f32(dot0, v1, v2);
amag0 = vfmaq_f32(amag0, v1, v1);
bmag0 = vfmaq_f32(bmag0, v2, v2);
v1 = vld1q_f32(pVect1); pVect1 += 4;
v2 = vld1q_f32(pVect2); pVect2 += 4;
dot1 = vfmaq_f32(dot1, v1, v2);
amag1 = vfmaq_f32(amag1, v1, v1);
bmag1 = vfmaq_f32(bmag1, v2, v2);
v1 = vld1q_f32(pVect1); pVect1 += 4;
v2 = vld1q_f32(pVect2); pVect2 += 4;
dot2 = vfmaq_f32(dot2, v1, v2);
amag2 = vfmaq_f32(amag2, v1, v1);
bmag2 = vfmaq_f32(bmag2, v2, v2);
v1 = vld1q_f32(pVect1); pVect1 += 4;
v2 = vld1q_f32(pVect2); pVect2 += 4;
dot3 = vfmaq_f32(dot3, v1, v2);
amag3 = vfmaq_f32(amag3, v1, v1);
bmag3 = vfmaq_f32(bmag3, v2, v2);
}
f32 dot_s = vaddvq_f32(vaddq_f32(vaddq_f32(dot0, dot1), vaddq_f32(dot2, dot3)));
f32 amag_s = vaddvq_f32(vaddq_f32(vaddq_f32(amag0, amag1), vaddq_f32(amag2, amag3)));
f32 bmag_s = vaddvq_f32(vaddq_f32(vaddq_f32(bmag0, bmag1), vaddq_f32(bmag2, bmag3)));
const f32 *pEnd2 = pVect1 + (qty - (qty16 << 4));
while (pVect1 < pEnd2) {
dot_s += *pVect1 * *pVect2;
amag_s += *pVect1 * *pVect1;
bmag_s += *pVect2 * *pVect2;
pVect1++; pVect2++;
}
return 1.0f - (dot_s / (sqrtf(amag_s) * sqrtf(bmag_s)));
}
static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v,
const void *qty_ptr) {
i8 *pVect1 = (i8 *)pVect1v;
i8 *pVect2 = (i8 *)pVect2v;
size_t qty = *((size_t *)qty_ptr);
const i8 *pEnd1 = pVect1 + qty;
i32 sum_scalar = 0;
while (pVect1 < pEnd1 - 7) {
// loading 8 at a time
int8x8_t v1 = vld1_s8(pVect1);
int8x8_t v2 = vld1_s8(pVect2);
pVect1 += 8;
pVect2 += 8;
// widen i8 to i16 for subtraction
int16x8_t v1_wide = vmovl_s8(v1);
int16x8_t v2_wide = vmovl_s8(v2);
int16x8_t diff = vsubq_s16(v1_wide, v2_wide);
// widening multiply: i16*i16 -> i32 to avoid i16 overflow
// (diff can be up to 255, so diff*diff can be up to 65025 > INT16_MAX)
int32x4_t sq_lo = vmull_s16(vget_low_s16(diff), vget_low_s16(diff));
int32x4_t sq_hi = vmull_s16(vget_high_s16(diff), vget_high_s16(diff));
int32x4_t sum = vaddq_s32(sq_lo, sq_hi);
sum_scalar += vgetq_lane_s32(sum, 0) + vgetq_lane_s32(sum, 1) +
2024-06-13 16:32:57 -07:00
vgetq_lane_s32(sum, 2) + vgetq_lane_s32(sum, 3);
}
// handle leftovers
while (pVect1 < pEnd1) {
i16 diff = (i16)*pVect1 - (i16)*pVect2;
sum_scalar += diff * diff;
pVect1++;
pVect2++;
}
return sqrtf(sum_scalar);
}
static i32 l1_int8_neon(const void *pVect1v, const void *pVect2v,
const void *qty_ptr) {
i8 *pVect1 = (i8 *)pVect1v;
i8 *pVect2 = (i8 *)pVect2v;
size_t qty = *((size_t *)qty_ptr);
const int8_t *pEnd1 = pVect1 + qty;
int32x4_t acc1 = vdupq_n_s32(0);
int32x4_t acc2 = vdupq_n_s32(0);
int32x4_t acc3 = vdupq_n_s32(0);
int32x4_t acc4 = vdupq_n_s32(0);
while (pVect1 < pEnd1 - 63) {
int8x16_t v1 = vld1q_s8(pVect1);
int8x16_t v2 = vld1q_s8(pVect2);
int8x16_t diff1 = vabdq_s8(v1, v2);
acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff1)));
v1 = vld1q_s8(pVect1 + 16);
v2 = vld1q_s8(pVect2 + 16);
int8x16_t diff2 = vabdq_s8(v1, v2);
acc2 = vaddq_s32(acc2, vpaddlq_u16(vpaddlq_u8(diff2)));
v1 = vld1q_s8(pVect1 + 32);
v2 = vld1q_s8(pVect2 + 32);
int8x16_t diff3 = vabdq_s8(v1, v2);
acc3 = vaddq_s32(acc3, vpaddlq_u16(vpaddlq_u8(diff3)));
v1 = vld1q_s8(pVect1 + 48);
v2 = vld1q_s8(pVect2 + 48);
int8x16_t diff4 = vabdq_s8(v1, v2);
acc4 = vaddq_s32(acc4, vpaddlq_u16(vpaddlq_u8(diff4)));
pVect1 += 64;
pVect2 += 64;
}
while (pVect1 < pEnd1 - 15) {
int8x16_t v1 = vld1q_s8(pVect1);
int8x16_t v2 = vld1q_s8(pVect2);
int8x16_t diff = vabdq_s8(v1, v2);
acc1 = vaddq_s32(acc1, vpaddlq_u16(vpaddlq_u8(diff)));
pVect1 += 16;
pVect2 += 16;
}
int32x4_t acc = vaddq_s32(vaddq_s32(acc1, acc2), vaddq_s32(acc3, acc4));
int32_t sum = 0;
while (pVect1 < pEnd1) {
int32_t diff = abs((int32_t)*pVect1 - (int32_t)*pVect2);
sum += diff;
pVect1++;
pVect2++;
}
return vaddvq_s32(acc) + sum;
}
static double l1_f32_neon(const void *pVect1v, const void *pVect2v,
const void *qty_ptr) {
f32 *pVect1 = (f32 *)pVect1v;
f32 *pVect2 = (f32 *)pVect2v;
size_t qty = *((size_t *)qty_ptr);
const f32 *pEnd1 = pVect1 + qty;
float64x2_t acc = vdupq_n_f64(0);
while (pVect1 < pEnd1 - 3) {
float32x4_t v1 = vld1q_f32(pVect1);
float32x4_t v2 = vld1q_f32(pVect2);
pVect1 += 4;
pVect2 += 4;
// f32x4 -> f64x2 pad for overflow
float64x2_t low_diff = vabdq_f64(vcvt_f64_f32(vget_low_f32(v1)),
vcvt_f64_f32(vget_low_f32(v2)));
float64x2_t high_diff =
vabdq_f64(vcvt_high_f64_f32(v1), vcvt_high_f64_f32(v2));
acc = vaddq_f64(acc, vaddq_f64(low_diff, high_diff));
}
double sum = 0;
while (pVect1 < pEnd1) {
sum += fabs((double)*pVect1 - (double)*pVect2);
pVect1++;
pVect2++;
}
return vaddvq_f64(acc) + sum;
}
2024-04-20 13:38:58 -07:00
#endif
2024-04-20 17:05:37 -07:00
static f32 l2_sqr_float(const void *pVect1v, const void *pVect2v,
const void *qty_ptr) {
f32 *pVect1 = (f32 *)pVect1v;
f32 *pVect2 = (f32 *)pVect2v;
2024-04-20 13:38:58 -07:00
size_t qty = *((size_t *)qty_ptr);
2024-04-20 17:05:37 -07:00
f32 res = 0;
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < qty; i++) {
2024-04-20 17:05:37 -07:00
f32 t = *pVect1 - *pVect2;
2024-04-20 13:38:58 -07:00
pVect1++;
pVect2++;
res += t * t;
}
return sqrt(res);
}
2024-04-20 17:05:37 -07:00
static f32 l2_sqr_int8(const void *pA, const void *pB, const void *pD) {
2024-04-20 17:02:19 -07:00
i8 *a = (i8 *)pA;
i8 *b = (i8 *)pB;
2024-04-20 13:38:58 -07:00
size_t d = *((size_t *)pD);
2024-04-20 17:05:37 -07:00
f32 res = 0;
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < d; i++) {
2024-04-20 17:05:37 -07:00
f32 t = *a - *b;
2024-04-20 13:38:58 -07:00
a++;
b++;
res += t * t;
}
return sqrt(res);
}
2024-04-20 17:05:37 -07:00
static f32 distance_l2_sqr_float(const void *a, const void *b, const void *d) {
2024-04-20 13:38:58 -07:00
#ifdef SQLITE_VEC_ENABLE_NEON
if ((*(const size_t *)d) > 16) {
2024-04-20 13:38:58 -07:00
return l2_sqr_float_neon(a, b, d);
}
#endif
#ifdef SQLITE_VEC_ENABLE_AVX
if (((*(const size_t *)d) % 16 == 0)) {
return l2_sqr_float_avx(a, b, d);
}
#endif
return l2_sqr_float(a, b, d);
}
2024-04-20 17:05:37 -07:00
static f32 distance_l2_sqr_int8(const void *a, const void *b, const void *d) {
2024-06-13 16:32:57 -07:00
#ifdef SQLITE_VEC_ENABLE_NEON
if ((*(const size_t *)d) > 7) {
return l2_sqr_int8_neon(a, b, d);
}
2024-06-13 16:32:57 -07:00
#endif
2024-04-20 13:38:58 -07:00
return l2_sqr_int8(a, b, d);
}
static i32 l1_int8(const void *pA, const void *pB, const void *pD) {
i8 *a = (i8 *)pA;
i8 *b = (i8 *)pB;
size_t d = *((size_t *)pD);
i32 res = 0;
for (size_t i = 0; i < d; i++) {
res += abs(*a - *b);
a++;
b++;
}
return res;
}
static i32 distance_l1_int8(const void *a, const void *b, const void *d) {
#ifdef SQLITE_VEC_ENABLE_NEON
if ((*(const size_t *)d) > 15) {
return l1_int8_neon(a, b, d);
}
#endif
return l1_int8(a, b, d);
}
static double l1_f32(const void *pA, const void *pB, const void *pD) {
f32 *a = (f32 *)pA;
f32 *b = (f32 *)pB;
size_t d = *((size_t *)pD);
double res = 0;
for (size_t i = 0; i < d; i++) {
res += fabs((double)*a - (double)*b);
a++;
b++;
}
return res;
}
static double distance_l1_f32(const void *a, const void *b, const void *d) {
#ifdef SQLITE_VEC_ENABLE_NEON
if ((*(const size_t *)d) > 3) {
return l1_f32_neon(a, b, d);
}
#endif
return l1_f32(a, b, d);
}
2024-04-20 17:05:37 -07:00
static f32 distance_cosine_float(const void *pVect1v, const void *pVect2v,
const void *qty_ptr) {
#ifdef SQLITE_VEC_ENABLE_NEON
if ((*(const size_t *)qty_ptr) > 16) {
return cosine_float_neon(pVect1v, pVect2v, qty_ptr);
}
#endif
2024-04-20 17:05:37 -07:00
f32 *pVect1 = (f32 *)pVect1v;
f32 *pVect2 = (f32 *)pVect2v;
2024-04-20 13:38:58 -07:00
size_t qty = *((size_t *)qty_ptr);
2024-04-20 17:05:37 -07:00
f32 dot = 0;
f32 aMag = 0;
f32 bMag = 0;
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < qty; i++) {
dot += *pVect1 * *pVect2;
aMag += *pVect1 * *pVect1;
bMag += *pVect2 * *pVect2;
pVect1++;
pVect2++;
}
return 1 - (dot / (sqrt(aMag) * sqrt(bMag)));
}
static f32 cosine_int8(const void *pA, const void *pB, const void *pD) {
2024-04-20 17:02:19 -07:00
i8 *a = (i8 *)pA;
i8 *b = (i8 *)pB;
2024-04-20 13:38:58 -07:00
size_t d = *((size_t *)pD);
2024-04-20 17:05:37 -07:00
f32 dot = 0;
f32 aMag = 0;
f32 bMag = 0;
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < d; i++) {
dot += *a * *b;
aMag += *a * *a;
bMag += *b * *b;
a++;
b++;
}
return 1 - (dot / (sqrt(aMag) * sqrt(bMag)));
}
#ifdef SQLITE_VEC_ENABLE_NEON
static f32 cosine_int8_neon(const void *pA, const void *pB, const void *pD) {
const i8 *a = (const i8 *)pA;
const i8 *b = (const i8 *)pB;
size_t d = *((const size_t *)pD);
const i8 *aEnd = a + d;
int32x4_t dot_acc1 = vdupq_n_s32(0);
int32x4_t dot_acc2 = vdupq_n_s32(0);
int32x4_t aMag_acc1 = vdupq_n_s32(0);
int32x4_t aMag_acc2 = vdupq_n_s32(0);
int32x4_t bMag_acc1 = vdupq_n_s32(0);
int32x4_t bMag_acc2 = vdupq_n_s32(0);
while (a < aEnd - 31) {
int8x16_t va1 = vld1q_s8(a);
int8x16_t vb1 = vld1q_s8(b);
int16x8_t a1_lo = vmovl_s8(vget_low_s8(va1));
int16x8_t a1_hi = vmovl_s8(vget_high_s8(va1));
int16x8_t b1_lo = vmovl_s8(vget_low_s8(vb1));
int16x8_t b1_hi = vmovl_s8(vget_high_s8(vb1));
dot_acc1 = vmlal_s16(dot_acc1, vget_low_s16(a1_lo), vget_low_s16(b1_lo));
dot_acc1 = vmlal_s16(dot_acc1, vget_high_s16(a1_lo), vget_high_s16(b1_lo));
dot_acc2 = vmlal_s16(dot_acc2, vget_low_s16(a1_hi), vget_low_s16(b1_hi));
dot_acc2 = vmlal_s16(dot_acc2, vget_high_s16(a1_hi), vget_high_s16(b1_hi));
aMag_acc1 = vmlal_s16(aMag_acc1, vget_low_s16(a1_lo), vget_low_s16(a1_lo));
aMag_acc1 = vmlal_s16(aMag_acc1, vget_high_s16(a1_lo), vget_high_s16(a1_lo));
aMag_acc2 = vmlal_s16(aMag_acc2, vget_low_s16(a1_hi), vget_low_s16(a1_hi));
aMag_acc2 = vmlal_s16(aMag_acc2, vget_high_s16(a1_hi), vget_high_s16(a1_hi));
bMag_acc1 = vmlal_s16(bMag_acc1, vget_low_s16(b1_lo), vget_low_s16(b1_lo));
bMag_acc1 = vmlal_s16(bMag_acc1, vget_high_s16(b1_lo), vget_high_s16(b1_lo));
bMag_acc2 = vmlal_s16(bMag_acc2, vget_low_s16(b1_hi), vget_low_s16(b1_hi));
bMag_acc2 = vmlal_s16(bMag_acc2, vget_high_s16(b1_hi), vget_high_s16(b1_hi));
int8x16_t va2 = vld1q_s8(a + 16);
int8x16_t vb2 = vld1q_s8(b + 16);
int16x8_t a2_lo = vmovl_s8(vget_low_s8(va2));
int16x8_t a2_hi = vmovl_s8(vget_high_s8(va2));
int16x8_t b2_lo = vmovl_s8(vget_low_s8(vb2));
int16x8_t b2_hi = vmovl_s8(vget_high_s8(vb2));
dot_acc1 = vmlal_s16(dot_acc1, vget_low_s16(a2_lo), vget_low_s16(b2_lo));
dot_acc1 = vmlal_s16(dot_acc1, vget_high_s16(a2_lo), vget_high_s16(b2_lo));
dot_acc2 = vmlal_s16(dot_acc2, vget_low_s16(a2_hi), vget_low_s16(b2_hi));
dot_acc2 = vmlal_s16(dot_acc2, vget_high_s16(a2_hi), vget_high_s16(b2_hi));
aMag_acc1 = vmlal_s16(aMag_acc1, vget_low_s16(a2_lo), vget_low_s16(a2_lo));
aMag_acc1 = vmlal_s16(aMag_acc1, vget_high_s16(a2_lo), vget_high_s16(a2_lo));
aMag_acc2 = vmlal_s16(aMag_acc2, vget_low_s16(a2_hi), vget_low_s16(a2_hi));
aMag_acc2 = vmlal_s16(aMag_acc2, vget_high_s16(a2_hi), vget_high_s16(a2_hi));
bMag_acc1 = vmlal_s16(bMag_acc1, vget_low_s16(b2_lo), vget_low_s16(b2_lo));
bMag_acc1 = vmlal_s16(bMag_acc1, vget_high_s16(b2_lo), vget_high_s16(b2_lo));
bMag_acc2 = vmlal_s16(bMag_acc2, vget_low_s16(b2_hi), vget_low_s16(b2_hi));
bMag_acc2 = vmlal_s16(bMag_acc2, vget_high_s16(b2_hi), vget_high_s16(b2_hi));
a += 32;
b += 32;
}
while (a < aEnd - 15) {
int8x16_t va = vld1q_s8(a);
int8x16_t vb = vld1q_s8(b);
int16x8_t a_lo = vmovl_s8(vget_low_s8(va));
int16x8_t a_hi = vmovl_s8(vget_high_s8(va));
int16x8_t b_lo = vmovl_s8(vget_low_s8(vb));
int16x8_t b_hi = vmovl_s8(vget_high_s8(vb));
dot_acc1 = vmlal_s16(dot_acc1, vget_low_s16(a_lo), vget_low_s16(b_lo));
dot_acc1 = vmlal_s16(dot_acc1, vget_high_s16(a_lo), vget_high_s16(b_lo));
dot_acc1 = vmlal_s16(dot_acc1, vget_low_s16(a_hi), vget_low_s16(b_hi));
dot_acc1 = vmlal_s16(dot_acc1, vget_high_s16(a_hi), vget_high_s16(b_hi));
aMag_acc1 = vmlal_s16(aMag_acc1, vget_low_s16(a_lo), vget_low_s16(a_lo));
aMag_acc1 = vmlal_s16(aMag_acc1, vget_high_s16(a_lo), vget_high_s16(a_lo));
aMag_acc1 = vmlal_s16(aMag_acc1, vget_low_s16(a_hi), vget_low_s16(a_hi));
aMag_acc1 = vmlal_s16(aMag_acc1, vget_high_s16(a_hi), vget_high_s16(a_hi));
bMag_acc1 = vmlal_s16(bMag_acc1, vget_low_s16(b_lo), vget_low_s16(b_lo));
bMag_acc1 = vmlal_s16(bMag_acc1, vget_high_s16(b_lo), vget_high_s16(b_lo));
bMag_acc1 = vmlal_s16(bMag_acc1, vget_low_s16(b_hi), vget_low_s16(b_hi));
bMag_acc1 = vmlal_s16(bMag_acc1, vget_high_s16(b_hi), vget_high_s16(b_hi));
a += 16;
b += 16;
}
int32x4_t dot_sum = vaddq_s32(dot_acc1, dot_acc2);
int32x4_t aMag_sum = vaddq_s32(aMag_acc1, aMag_acc2);
int32x4_t bMag_sum = vaddq_s32(bMag_acc1, bMag_acc2);
i32 dot = vaddvq_s32(dot_sum);
i32 aMag = vaddvq_s32(aMag_sum);
i32 bMag = vaddvq_s32(bMag_sum);
while (a < aEnd) {
dot += (i32)*a * (i32)*b;
aMag += (i32)*a * (i32)*a;
bMag += (i32)*b * (i32)*b;
a++;
b++;
}
return 1.0f - ((f32)dot / (sqrtf((f32)aMag) * sqrtf((f32)bMag)));
}
#endif
static f32 distance_cosine_int8(const void *a, const void *b, const void *d) {
#ifdef SQLITE_VEC_ENABLE_NEON
if ((*(const size_t *)d) > 15) {
return cosine_int8_neon(a, b, d);
}
#endif
return cosine_int8(a, b, d);
}
2024-04-20 13:38:58 -07:00
// https://github.com/facebookresearch/faiss/blob/77e2e79cd0a680adc343b9840dd865da724c579e/faiss/utils/hamming_distance/common.h#L34
2024-04-20 17:02:19 -07:00
static u8 hamdist_table[256] = {
2024-04-20 13:38:58 -07:00
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
#ifdef SQLITE_VEC_ENABLE_NEON
static f32 distance_hamming_neon(const u8 *a, const u8 *b, size_t n_bytes) {
const u8 *pEnd = a + n_bytes;
uint32x4_t acc1 = vdupq_n_u32(0);
uint32x4_t acc2 = vdupq_n_u32(0);
uint32x4_t acc3 = vdupq_n_u32(0);
uint32x4_t acc4 = vdupq_n_u32(0);
while (a <= pEnd - 64) {
uint8x16_t v1 = vld1q_u8(a);
uint8x16_t v2 = vld1q_u8(b);
acc1 = vaddq_u32(acc1, vpaddlq_u16(vpaddlq_u8(vcntq_u8(veorq_u8(v1, v2)))));
v1 = vld1q_u8(a + 16);
v2 = vld1q_u8(b + 16);
acc2 = vaddq_u32(acc2, vpaddlq_u16(vpaddlq_u8(vcntq_u8(veorq_u8(v1, v2)))));
v1 = vld1q_u8(a + 32);
v2 = vld1q_u8(b + 32);
acc3 = vaddq_u32(acc3, vpaddlq_u16(vpaddlq_u8(vcntq_u8(veorq_u8(v1, v2)))));
v1 = vld1q_u8(a + 48);
v2 = vld1q_u8(b + 48);
acc4 = vaddq_u32(acc4, vpaddlq_u16(vpaddlq_u8(vcntq_u8(veorq_u8(v1, v2)))));
a += 64;
b += 64;
}
while (a <= pEnd - 16) {
uint8x16_t v1 = vld1q_u8(a);
uint8x16_t v2 = vld1q_u8(b);
acc1 = vaddq_u32(acc1, vpaddlq_u16(vpaddlq_u8(vcntq_u8(veorq_u8(v1, v2)))));
a += 16;
b += 16;
}
acc1 = vaddq_u32(acc1, acc2);
acc3 = vaddq_u32(acc3, acc4);
acc1 = vaddq_u32(acc1, acc3);
u32 sum = vaddvq_u32(acc1);
while (a < pEnd) {
sum += hamdist_table[*a ^ *b];
a++;
b++;
}
return (f32)sum;
}
#endif
#ifdef SQLITE_VEC_ENABLE_AVX
/**
* AVX2 Hamming distance using VPSHUFB-based popcount.
* Processes 32 bytes (256 bits) per iteration.
*/
static f32 distance_hamming_avx2(const u8 *a, const u8 *b, size_t n_bytes) {
const u8 *pEnd = a + n_bytes;
// VPSHUFB lookup table: popcount of low nibble
const __m256i lookup = _mm256_setr_epi8(
0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,
0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4);
const __m256i low_mask = _mm256_set1_epi8(0x0f);
__m256i acc = _mm256_setzero_si256();
while (a <= pEnd - 32) {
__m256i va = _mm256_loadu_si256((const __m256i *)a);
__m256i vb = _mm256_loadu_si256((const __m256i *)b);
__m256i xored = _mm256_xor_si256(va, vb);
// VPSHUFB popcount: split into nibbles, lookup each
__m256i lo = _mm256_and_si256(xored, low_mask);
__m256i hi = _mm256_and_si256(_mm256_srli_epi16(xored, 4), low_mask);
__m256i popcnt = _mm256_add_epi8(_mm256_shuffle_epi8(lookup, lo),
_mm256_shuffle_epi8(lookup, hi));
// Horizontal sum: u8 -> u64 via sad against zero
acc = _mm256_add_epi64(acc, _mm256_sad_epu8(popcnt, _mm256_setzero_si256()));
a += 32;
b += 32;
}
// Horizontal sum of 4 x u64 lanes
u64 tmp[4];
_mm256_storeu_si256((__m256i *)tmp, acc);
u32 sum = (u32)(tmp[0] + tmp[1] + tmp[2] + tmp[3]);
// Scalar tail
while (a < pEnd) {
u8 x = *a ^ *b;
x = x - ((x >> 1) & 0x55);
x = (x & 0x33) + ((x >> 2) & 0x33);
sum += (x + (x >> 4)) & 0x0F;
a++;
b++;
}
return (f32)sum;
}
#endif
2024-04-20 17:05:37 -07:00
static f32 distance_hamming_u8(u8 *a, u8 *b, size_t n) {
2024-04-20 13:38:58 -07:00
int same = 0;
for (unsigned long i = 0; i < n; i++) {
same += hamdist_table[a[i] ^ b[i]];
}
2024-04-20 17:05:37 -07:00
return (f32)same;
2024-04-20 13:38:58 -07:00
}
2024-06-09 14:53:12 +08:00
#ifdef _MSC_VER
2024-08-10 23:33:28 -07:00
#if !defined(__clang__) && (defined(_M_ARM) || defined(_M_ARM64))
// From
// https://github.com/ngtcp2/ngtcp2/blob/b64f1e77b5e0d880b93d31f474147fae4a1d17cc/lib/ngtcp2_ringbuf.c,
// line 34-43
static unsigned int __builtin_popcountl(unsigned int x) {
unsigned int c = 0;
for (; x; ++c) {
x &= x - 1;
}
return c;
}
#else
2024-06-13 16:32:57 -07:00
#include <intrin.h>
#define __builtin_popcountl __popcnt64
2024-06-09 14:53:12 +08:00
#endif
#endif
2024-06-09 14:53:12 +08:00
static f32 distance_hamming_u64(const u8 *a, const u8 *b, size_t n) {
2024-04-20 13:38:58 -07:00
int same = 0;
for (unsigned long i = 0; i < n; i++) {
u64 va, vb;
memcpy(&va, a + i * sizeof(u64), sizeof(u64));
memcpy(&vb, b + i * sizeof(u64), sizeof(u64));
same += __builtin_popcountl(va ^ vb);
2024-04-20 13:38:58 -07:00
}
2024-04-20 17:05:37 -07:00
return (f32)same;
2024-04-20 13:38:58 -07:00
}
2024-05-26 20:54:37 -07:00
/**
* @brief Calculate the hamming distance between two bitvectors.
*
* @param a - first bitvector, MUST have d dimensions
* @param b - second bitvector, MUST have d dimensions
* @param d - pointer to size_t, MUST be divisible by CHAR_BIT
* @return f32
*/
2024-04-20 17:05:37 -07:00
static f32 distance_hamming(const void *a, const void *b, const void *d) {
2024-04-20 13:38:58 -07:00
size_t dimensions = *((size_t *)d);
size_t n_bytes = dimensions / CHAR_BIT;
#ifdef SQLITE_VEC_ENABLE_NEON
if (dimensions >= 128) {
return distance_hamming_neon((const u8 *)a, (const u8 *)b, n_bytes);
}
#endif
#ifdef SQLITE_VEC_ENABLE_AVX
if (n_bytes >= 32) {
return distance_hamming_avx2((const u8 *)a, (const u8 *)b, n_bytes);
}
#endif
2024-04-20 13:38:58 -07:00
if ((dimensions % 64) == 0) {
return distance_hamming_u64((const u8 *)a, (const u8 *)b, n_bytes / sizeof(u64));
2024-04-20 13:38:58 -07:00
}
return distance_hamming_u8((u8 *)a, (u8 *)b, n_bytes);
2024-04-20 13:38:58 -07:00
}
#ifdef SQLITE_VEC_TEST
f32 _test_distance_l2_sqr_float(const f32 *a, const f32 *b, size_t dims) {
return distance_l2_sqr_float(a, b, &dims);
}
f32 _test_distance_cosine_float(const f32 *a, const f32 *b, size_t dims) {
return distance_cosine_float(a, b, &dims);
}
f32 _test_distance_hamming(const u8 *a, const u8 *b, size_t dims) {
return distance_hamming(a, b, &dims);
}
#endif
2024-04-20 13:38:58 -07:00
// from SQLite source:
// https://github.com/sqlite/sqlite/blob/a509a90958ddb234d1785ed7801880ccb18b497e/src/json.c#L153
static const char vecJsonIsSpaceX[] = {
2024-04-20 13:38:58 -07:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
#define vecJsonIsspace(x) (vecJsonIsSpaceX[(unsigned char)x])
2024-04-20 13:38:58 -07:00
typedef void (*vector_cleanup)(void *p);
void vector_cleanup_noop(void *_) { UNUSED_PARAMETER(_); }
#define JSON_SUBTYPE 74
2024-06-24 23:26:11 -07:00
void vtab_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) {
va_list args;
sqlite3_free(pVTab->zErrMsg);
va_start(args, zFormat);
pVTab->zErrMsg = sqlite3_vmprintf(zFormat, args);
va_end(args);
}
2024-04-20 13:38:58 -07:00
struct Array {
size_t element_size;
size_t length;
size_t capacity;
void *z;
};
2024-05-12 00:16:10 -07:00
/**
* @brief Initial an array with the given element size and capacity.
*
* @param array
* @param element_size
* @param init_capacity
2024-06-13 16:32:57 -07:00
* @return SQLITE_OK on success, error code on failure. Only error is
* SQLITE_NOMEM
2024-05-12 00:16:10 -07:00
*/
2024-04-20 13:38:58 -07:00
int array_init(struct Array *array, size_t element_size, size_t init_capacity) {
int sz = element_size * init_capacity;
void *z = sqlite3_malloc(sz);
2024-04-20 13:38:58 -07:00
if (!z) {
return SQLITE_NOMEM;
}
memset(z, 0, sz);
2024-04-20 13:38:58 -07:00
array->element_size = element_size;
array->length = 0;
array->capacity = init_capacity;
array->z = z;
return SQLITE_OK;
}
int array_append(struct Array *array, const void *element) {
if (array->length == array->capacity) {
size_t new_capacity = array->capacity * 2 + 100;
void *z = sqlite3_realloc64(array->z, array->element_size * new_capacity);
if (z) {
array->capacity = new_capacity;
array->z = z;
} else {
return SQLITE_NOMEM;
}
}
2024-07-05 12:07:45 -07:00
memcpy(&((unsigned char *)array->z)[array->length * array->element_size],
element, array->element_size);
2024-04-20 13:38:58 -07:00
array->length++;
return SQLITE_OK;
}
void array_cleanup(struct Array *array) {
2024-07-05 12:07:45 -07:00
if (!array)
return;
2024-04-20 13:38:58 -07:00
array->element_size = 0;
array->length = 0;
array->capacity = 0;
sqlite3_free(array->z);
array->z = NULL;
}
char *vector_subtype_name(int subtype) {
switch (subtype) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
return "float32";
case SQLITE_VEC_ELEMENT_TYPE_INT8:
return "int8";
case SQLITE_VEC_ELEMENT_TYPE_BIT:
return "bit";
}
return "";
}
char *type_name(int type) {
switch (type) {
case SQLITE_INTEGER:
return "INTEGER";
case SQLITE_BLOB:
return "BLOB";
case SQLITE_TEXT:
return "TEXT";
case SQLITE_FLOAT:
return "FLOAT";
case SQLITE_NULL:
return "NULL";
}
return "";
}
typedef void (*fvec_cleanup)(void *vector);
2024-04-20 13:38:58 -07:00
void fvec_cleanup_noop(void *_) { UNUSED_PARAMETER(_); }
2024-04-20 13:38:58 -07:00
2024-04-20 17:05:37 -07:00
static int fvec_from_value(sqlite3_value *value, f32 **vector,
2024-04-20 13:38:58 -07:00
size_t *dimensions, fvec_cleanup *cleanup,
char **pzErr) {
int value_type = sqlite3_value_type(value);
2024-05-12 00:16:10 -07:00
2024-04-20 13:38:58 -07:00
if (value_type == SQLITE_BLOB) {
const void *blob = sqlite3_value_blob(value);
int bytes = sqlite3_value_bytes(value);
if (bytes == 0) {
*pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
return SQLITE_ERROR;
}
2024-04-20 17:05:37 -07:00
if ((bytes % sizeof(f32)) != 0) {
2024-04-20 13:38:58 -07:00
*pzErr = sqlite3_mprintf("invalid float32 vector BLOB length. Must be "
"divisible by %d, found %d",
2024-04-20 17:05:37 -07:00
sizeof(f32), bytes);
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR;
}
f32 *buf = sqlite3_malloc(bytes);
if (!buf) {
*pzErr = sqlite3_mprintf("out of memory");
return SQLITE_NOMEM;
}
memcpy(buf, blob, bytes);
size_t n = bytes / sizeof(f32);
for (size_t i = 0; i < n; i++) {
if (isnan(buf[i]) || isinf(buf[i])) {
*pzErr = sqlite3_mprintf(
"invalid float32 vector: element %d is %s",
(int)i, isnan(buf[i]) ? "NaN" : "Inf");
sqlite3_free(buf);
return SQLITE_ERROR;
}
}
*vector = buf;
*dimensions = n;
*cleanup = sqlite3_free;
2024-04-20 13:38:58 -07:00
return SQLITE_OK;
}
if (value_type == SQLITE_TEXT) {
const char *source = (const char *)sqlite3_value_text(value);
int source_len = sqlite3_value_bytes(value);
2024-07-23 23:57:42 -07:00
if (source_len == 0) {
2024-07-16 22:28:15 -07:00
*pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
return SQLITE_ERROR;
}
2024-04-20 13:38:58 -07:00
int i = 0;
struct Array x;
2024-04-20 17:05:37 -07:00
int rc = array_init(&x, sizeof(f32), ceil(source_len / 2.0));
2024-06-13 16:32:57 -07:00
if (rc != SQLITE_OK) {
2024-05-12 00:16:10 -07:00
return rc;
}
2024-04-20 13:38:58 -07:00
// advance leading whitespace to first '['
while (i < source_len) {
if (vecJsonIsspace(source[i])) {
2024-04-20 13:38:58 -07:00
i++;
continue;
}
if (source[i] == '[') {
break;
}
*pzErr = sqlite3_mprintf(
"JSON array parsing error: Input does not start with '['");
array_cleanup(&x);
return SQLITE_ERROR;
}
if (source[i] != '[') {
*pzErr = sqlite3_mprintf(
"JSON array parsing error: Input does not start with '['");
array_cleanup(&x);
return SQLITE_ERROR;
}
int offset = i + 1;
while (offset < source_len) {
char *ptr = (char *)&source[offset];
char *endptr;
errno = 0;
double result = strtod(ptr, &endptr);
if ((errno != 0 && result == 0) // some interval error?
|| (errno == ERANGE &&
(result == HUGE_VAL || result == -HUGE_VAL)) // too big / smalls
) {
sqlite3_free(x.z);
*pzErr = sqlite3_mprintf("JSON parsing error");
return SQLITE_ERROR;
}
if (endptr == ptr) {
if (*ptr != ']') {
sqlite3_free(x.z);
*pzErr = sqlite3_mprintf("JSON parsing error");
return SQLITE_ERROR;
}
goto done;
}
2024-04-20 17:05:37 -07:00
f32 res = (f32)result;
if (isnan(res) || isinf(res)) {
sqlite3_free(x.z);
*pzErr = sqlite3_mprintf(
"invalid float32 vector: element %d is %s",
(int)x.length, isnan(res) ? "NaN" : "Inf");
return SQLITE_ERROR;
}
2024-04-20 13:38:58 -07:00
array_append(&x, (const void *)&res);
offset += (endptr - ptr);
while (offset < source_len) {
if (vecJsonIsspace(source[offset])) {
2024-04-20 13:38:58 -07:00
offset++;
continue;
}
if (source[offset] == ',') {
offset++;
continue;
2024-07-31 12:56:09 -07:00
}
2024-04-20 13:38:58 -07:00
if (source[offset] == ']')
goto done;
break;
}
}
done:
if (x.length > 0) {
2024-04-20 17:05:37 -07:00
*vector = (f32 *)x.z;
2024-04-20 13:38:58 -07:00
*dimensions = x.length;
*cleanup = sqlite3_free;
2024-04-20 13:38:58 -07:00
return SQLITE_OK;
}
sqlite3_free(x.z);
*pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
return SQLITE_ERROR;
}
*pzErr = sqlite3_mprintf(
2024-06-28 10:51:59 -07:00
"Input must have type BLOB (compact format) or TEXT (JSON), found %s",
type_name(value_type));
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR;
}
2024-04-20 17:02:19 -07:00
static int bitvec_from_value(sqlite3_value *value, u8 **vector,
2024-04-20 13:38:58 -07:00
size_t *dimensions, vector_cleanup *cleanup,
char **pzErr) {
int value_type = sqlite3_value_type(value);
if (value_type == SQLITE_BLOB) {
const void *blob = sqlite3_value_blob(value);
int bytes = sqlite3_value_bytes(value);
if (bytes == 0) {
*pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
return SQLITE_ERROR;
}
2024-04-20 17:02:19 -07:00
*vector = (u8 *)blob;
2024-04-20 13:38:58 -07:00
*dimensions = bytes * CHAR_BIT;
*cleanup = vector_cleanup_noop;
return SQLITE_OK;
}
*pzErr = sqlite3_mprintf("Unknown type for bitvector.");
return SQLITE_ERROR;
}
2024-04-20 17:02:19 -07:00
static int int8_vec_from_value(sqlite3_value *value, i8 **vector,
2024-04-20 13:38:58 -07:00
size_t *dimensions, vector_cleanup *cleanup,
2024-06-28 10:51:59 -07:00
char **pzErr) {
2024-04-20 13:38:58 -07:00
int value_type = sqlite3_value_type(value);
if (value_type == SQLITE_BLOB) {
const void *blob = sqlite3_value_blob(value);
int bytes = sqlite3_value_bytes(value);
if (bytes == 0) {
*pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
return SQLITE_ERROR;
}
2024-04-20 17:02:19 -07:00
*vector = (i8 *)blob;
2024-04-20 13:38:58 -07:00
*dimensions = bytes;
*cleanup = vector_cleanup_noop;
return SQLITE_OK;
}
2024-06-13 16:32:57 -07:00
if (value_type == SQLITE_TEXT) {
const char *source = (const char *)sqlite3_value_text(value);
int source_len = sqlite3_value_bytes(value);
int i = 0;
2024-07-23 23:57:42 -07:00
if (source_len == 0) {
2024-07-16 22:28:15 -07:00
*pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
return SQLITE_ERROR;
}
struct Array x;
int rc = array_init(&x, sizeof(i8), ceil(source_len / 2.0));
if (rc != SQLITE_OK) {
return rc;
}
// advance leading whitespace to first '['
while (i < source_len) {
if (vecJsonIsspace(source[i])) {
i++;
continue;
}
if (source[i] == '[') {
break;
}
*pzErr = sqlite3_mprintf(
"JSON array parsing error: Input does not start with '['");
array_cleanup(&x);
return SQLITE_ERROR;
}
if (source[i] != '[') {
*pzErr = sqlite3_mprintf(
"JSON array parsing error: Input does not start with '['");
array_cleanup(&x);
return SQLITE_ERROR;
}
int offset = i + 1;
while (offset < source_len) {
char *ptr = (char *)&source[offset];
char *endptr;
errno = 0;
long result = strtol(ptr, &endptr, 10);
2024-06-13 16:32:57 -07:00
if ((errno != 0 && result == 0) ||
(errno == ERANGE && (result == LONG_MAX || result == LONG_MIN))) {
sqlite3_free(x.z);
*pzErr = sqlite3_mprintf("JSON parsing error");
return SQLITE_ERROR;
}
if (endptr == ptr) {
if (*ptr != ']') {
sqlite3_free(x.z);
*pzErr = sqlite3_mprintf("JSON parsing error");
return SQLITE_ERROR;
}
goto done;
}
if (result < INT8_MIN || result > INT8_MAX) {
sqlite3_free(x.z);
2024-06-13 16:32:57 -07:00
*pzErr =
sqlite3_mprintf("JSON parsing error: value out of range for int8");
return SQLITE_ERROR;
}
i8 res = (i8)result;
array_append(&x, (const void *)&res);
offset += (endptr - ptr);
while (offset < source_len) {
if (vecJsonIsspace(source[offset])) {
offset++;
continue;
}
if (source[offset] == ',') {
offset++;
continue;
}
if (source[offset] == ']')
goto done;
break;
}
}
done:
if (x.length > 0) {
*vector = (i8 *)x.z;
*dimensions = x.length;
*cleanup = (vector_cleanup)sqlite3_free;
return SQLITE_OK;
}
sqlite3_free(x.z);
*pzErr = sqlite3_mprintf("zero-length vectors are not supported.");
return SQLITE_ERROR;
}
2024-04-20 13:38:58 -07:00
*pzErr = sqlite3_mprintf("Unknown type for int8 vector.");
return SQLITE_ERROR;
}
/**
* @brief Extract a vector from a sqlite3_value. Can be a float32, int8, or bit
* vector.
*
* @param value: the sqlite3_value to read from.
* @param vector: Output pointer to vector data.
* @param dimensions: Output number of dimensions
* @param dimensions: Output vector element type
* @param cleanup
* @param pzErrorMessage
* @return int SQLITE_OK on success, error code otherwise
*/
int vector_from_value(sqlite3_value *value, void **vector, size_t *dimensions,
enum VectorElementType *element_type,
vector_cleanup *cleanup, char **pzErrorMessage) {
int subtype = sqlite3_value_subtype(value);
if (!subtype || (subtype == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) ||
(subtype == JSON_SUBTYPE)) {
2024-04-20 17:05:37 -07:00
int rc = fvec_from_value(value, (f32 **)vector, dimensions,
2024-04-20 13:38:58 -07:00
(fvec_cleanup *)cleanup, pzErrorMessage);
if (rc == SQLITE_OK) {
*element_type = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
}
return rc;
}
if (subtype == SQLITE_VEC_ELEMENT_TYPE_BIT) {
2024-04-20 17:02:19 -07:00
int rc = bitvec_from_value(value, (u8 **)vector, dimensions, cleanup,
2024-04-20 13:38:58 -07:00
pzErrorMessage);
if (rc == SQLITE_OK) {
*element_type = SQLITE_VEC_ELEMENT_TYPE_BIT;
}
return rc;
}
if (subtype == SQLITE_VEC_ELEMENT_TYPE_INT8) {
2024-04-20 17:02:19 -07:00
int rc = int8_vec_from_value(value, (i8 **)vector, dimensions, cleanup,
2024-04-20 13:38:58 -07:00
pzErrorMessage);
if (rc == SQLITE_OK) {
*element_type = SQLITE_VEC_ELEMENT_TYPE_INT8;
}
return rc;
}
*pzErrorMessage = sqlite3_mprintf("Unknown subtype: %d", subtype);
return SQLITE_ERROR;
}
int ensure_vector_match(sqlite3_value *aValue, sqlite3_value *bValue, void **a,
void **b, enum VectorElementType *element_type,
size_t *dimensions, vector_cleanup *outACleanup,
vector_cleanup *outBCleanup, char **outError) {
int rc;
enum VectorElementType aType, bType;
size_t aDims, bDims;
2024-07-16 22:28:15 -07:00
char *error = NULL;
2024-04-20 13:38:58 -07:00
vector_cleanup aCleanup, bCleanup;
rc = vector_from_value(aValue, a, &aDims, &aType, &aCleanup, &error);
if (rc != SQLITE_OK) {
*outError = sqlite3_mprintf("Error reading 1st vector: %s", error);
sqlite3_free(error);
return SQLITE_ERROR;
}
rc = vector_from_value(bValue, b, &bDims, &bType, &bCleanup, &error);
if (rc != SQLITE_OK) {
*outError = sqlite3_mprintf("Error reading 2nd vector: %s", error);
sqlite3_free(error);
aCleanup(*a);
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR;
}
if (aType != bType) {
*outError =
sqlite3_mprintf("Vector type mistmatch. First vector has type %s, "
"while the second has type %s.",
vector_subtype_name(aType), vector_subtype_name(bType));
2024-07-16 22:28:15 -07:00
aCleanup(*a);
bCleanup(*b);
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR;
}
if (aDims != bDims) {
*outError = sqlite3_mprintf(
"Vector dimension mistmatch. First vector has %ld dimensions, "
"while the second has %ld dimensions.",
aDims, bDims);
2024-07-16 22:28:15 -07:00
aCleanup(*a);
bCleanup(*b);
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR;
}
*element_type = aType;
*dimensions = aDims;
*outACleanup = aCleanup;
*outBCleanup = bCleanup;
return SQLITE_OK;
}
2024-04-20 17:02:19 -07:00
int _cmp(const void *a, const void *b) { return (*(i64 *)a - *(i64 *)b); }
2024-04-20 13:38:58 -07:00
2024-06-24 23:26:11 -07:00
#pragma region scalar functions
2024-04-20 13:38:58 -07:00
static void vec_f32(sqlite3_context *context, int argc, sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 1);
2024-04-20 13:38:58 -07:00
int rc;
2024-07-16 22:28:15 -07:00
f32 *vector = NULL;
2024-04-20 13:38:58 -07:00
size_t dimensions;
fvec_cleanup cleanup;
char *errmsg;
rc = fvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, errmsg, -1);
sqlite3_free(errmsg);
return;
}
2024-04-20 17:05:37 -07:00
sqlite3_result_blob(context, vector, dimensions * sizeof(f32),
2024-06-24 23:26:11 -07:00
(void (*)(void *))cleanup);
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
}
2024-06-24 23:26:11 -07:00
2024-04-20 13:38:58 -07:00
static void vec_bit(sqlite3_context *context, int argc, sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 1);
2024-04-20 13:38:58 -07:00
int rc;
2024-04-20 17:02:19 -07:00
u8 *vector;
2024-04-20 13:38:58 -07:00
size_t dimensions;
vector_cleanup cleanup;
char *errmsg;
rc = bitvec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, errmsg, -1);
sqlite3_free(errmsg);
return;
}
sqlite3_result_blob(context, vector, dimensions / CHAR_BIT, SQLITE_TRANSIENT);
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
cleanup(vector);
}
static void vec_int8(sqlite3_context *context, int argc, sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 1);
2024-04-20 13:38:58 -07:00
int rc;
2024-04-20 17:02:19 -07:00
i8 *vector;
2024-04-20 13:38:58 -07:00
size_t dimensions;
vector_cleanup cleanup;
char *errmsg;
rc = int8_vec_from_value(argv[0], &vector, &dimensions, &cleanup, &errmsg);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, errmsg, -1);
sqlite3_free(errmsg);
return;
}
sqlite3_result_blob(context, vector, dimensions, SQLITE_TRANSIENT);
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
cleanup(vector);
}
static void vec_length(sqlite3_context *context, int argc,
sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 1);
2024-04-20 13:38:58 -07:00
int rc;
void *vector;
size_t dimensions;
vector_cleanup cleanup;
char *errmsg;
enum VectorElementType elementType;
rc = vector_from_value(argv[0], &vector, &dimensions, &elementType, &cleanup,
&errmsg);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, errmsg, -1);
sqlite3_free(errmsg);
return;
}
sqlite3_result_int64(context, dimensions);
cleanup(vector);
}
static void vec_distance_cosine(sqlite3_context *context, int argc,
sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 2);
2024-04-20 13:38:58 -07:00
int rc;
2024-07-16 22:28:15 -07:00
void *a = NULL, *b = NULL;
2024-04-20 13:38:58 -07:00
size_t dimensions;
vector_cleanup aCleanup, bCleanup;
char *error;
enum VectorElementType elementType;
rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
&aCleanup, &bCleanup, &error);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, error, -1);
sqlite3_free(error);
return;
}
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
sqlite3_result_error(
context, "Cannot calculate cosine distance between two bitvectors.",
-1);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
2024-04-20 17:05:37 -07:00
f32 result = distance_cosine_float(a, b, &dimensions);
2024-04-20 13:38:58 -07:00
sqlite3_result_double(context, result);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
2024-04-20 17:05:37 -07:00
f32 result = distance_cosine_int8(a, b, &dimensions);
2024-04-20 13:38:58 -07:00
sqlite3_result_double(context, result);
goto finish;
}
}
finish:
aCleanup(a);
bCleanup(b);
return;
}
static void vec_distance_l2(sqlite3_context *context, int argc,
sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 2);
2024-04-20 13:38:58 -07:00
int rc;
2024-07-16 22:28:15 -07:00
void *a = NULL, *b = NULL;
2024-04-20 13:38:58 -07:00
size_t dimensions;
vector_cleanup aCleanup, bCleanup;
char *error;
enum VectorElementType elementType;
rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
&aCleanup, &bCleanup, &error);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, error, -1);
sqlite3_free(error);
return;
}
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
sqlite3_result_error(
context, "Cannot calculate L2 distance between two bitvectors.", -1);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
2024-04-20 17:05:37 -07:00
f32 result = distance_l2_sqr_float(a, b, &dimensions);
2024-04-20 13:38:58 -07:00
sqlite3_result_double(context, result);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
2024-04-20 17:05:37 -07:00
f32 result = distance_l2_sqr_int8(a, b, &dimensions);
2024-04-20 13:38:58 -07:00
sqlite3_result_double(context, result);
goto finish;
}
}
finish:
aCleanup(a);
bCleanup(b);
return;
}
static void vec_distance_l1(sqlite3_context *context, int argc,
sqlite3_value **argv) {
assert(argc == 2);
int rc;
void *a, *b;
size_t dimensions;
vector_cleanup aCleanup, bCleanup;
char *error;
enum VectorElementType elementType;
rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
&aCleanup, &bCleanup, &error);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, error, -1);
sqlite3_free(error);
return;
}
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
sqlite3_result_error(
context, "Cannot calculate L1 distance between two bitvectors.", -1);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
double result = distance_l1_f32(a, b, &dimensions);
sqlite3_result_double(context, result);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
i64 result = distance_l1_int8(a, b, &dimensions);
sqlite3_result_int(context, result);
goto finish;
}
}
finish:
aCleanup(a);
bCleanup(b);
return;
}
2024-04-20 13:38:58 -07:00
static void vec_distance_hamming(sqlite3_context *context, int argc,
sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 2);
2024-04-20 13:38:58 -07:00
int rc;
2024-07-16 22:28:15 -07:00
void *a = NULL, *b = NULL;
2024-04-20 13:38:58 -07:00
size_t dimensions;
vector_cleanup aCleanup, bCleanup;
char *error;
enum VectorElementType elementType;
rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
&aCleanup, &bCleanup, &error);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, error, -1);
sqlite3_free(error);
return;
}
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
sqlite3_result_double(context, distance_hamming(a, b, &dimensions));
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
sqlite3_result_error(
context,
"Cannot calculate hamming distance between two float32 vectors.", -1);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
sqlite3_result_error(
context, "Cannot calculate hamming distance between two int8 vectors.",
-1);
goto finish;
}
}
finish:
aCleanup(a);
bCleanup(b);
return;
}
2024-07-23 23:57:42 -07:00
char *vec_type_name(enum VectorElementType elementType) {
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
return "float32";
case SQLITE_VEC_ELEMENT_TYPE_INT8:
return "int8";
case SQLITE_VEC_ELEMENT_TYPE_BIT:
return "bit";
2024-07-22 21:24:44 -07:00
}
2024-08-09 10:25:31 -07:00
return "";
2024-07-22 21:24:44 -07:00
}
2024-07-23 23:57:42 -07:00
static void vec_type(sqlite3_context *context, int argc, sqlite3_value **argv) {
2024-07-22 21:24:44 -07:00
assert(argc == 1);
void *vector;
size_t dimensions;
vector_cleanup cleanup;
char *pzError;
enum VectorElementType elementType;
int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
&cleanup, &pzError);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, pzError, -1);
sqlite3_free(pzError);
return;
}
sqlite3_result_text(context, vec_type_name(elementType), -1, SQLITE_STATIC);
cleanup(vector);
}
static void vec_quantize_binary(sqlite3_context *context, int argc,
sqlite3_value **argv) {
assert(argc == 1);
void *vector;
size_t dimensions;
vector_cleanup vectorCleanup;
char *pzError;
enum VectorElementType elementType;
int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
&vectorCleanup, &pzError);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, pzError, -1);
sqlite3_free(pzError);
return;
}
2024-07-23 23:57:42 -07:00
if (dimensions <= 0) {
2024-07-22 21:24:44 -07:00
sqlite3_result_error(context, "Zero length vectors are not supported.", -1);
goto cleanup;
return;
}
2024-07-23 23:57:42 -07:00
if ((dimensions % CHAR_BIT) != 0) {
sqlite3_result_error(
context,
"Binary quantization requires vectors with a length divisible by 8",
-1);
2024-07-22 21:24:44 -07:00
goto cleanup;
return;
}
int sz = dimensions / CHAR_BIT;
u8 *out = sqlite3_malloc(sz);
if (!out) {
sqlite3_result_error_code(context, SQLITE_NOMEM);
goto cleanup;
return;
}
memset(out, 0, sz);
2024-07-23 23:57:42 -07:00
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
2024-07-22 21:24:44 -07:00
2024-07-23 23:57:42 -07:00
for (size_t i = 0; i < dimensions; i++) {
int res = ((f32 *)vector)[i] > 0.0;
out[i / 8] |= (res << (i % 8));
2024-07-22 21:24:44 -07:00
}
2024-07-23 23:57:42 -07:00
break;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
for (size_t i = 0; i < dimensions; i++) {
int res = ((i8 *)vector)[i] > 0;
out[i / 8] |= (res << (i % 8));
2024-07-22 21:24:44 -07:00
}
2024-07-23 23:57:42 -07:00
break;
}
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
sqlite3_result_error(context,
"Can only binary quantize float or int8 vectors", -1);
sqlite3_free(out);
return;
}
2024-07-22 21:24:44 -07:00
}
sqlite3_result_blob(context, out, sz, sqlite3_free);
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
2024-07-23 23:57:42 -07:00
cleanup:
vectorCleanup(vector);
2024-07-22 21:24:44 -07:00
}
static void vec_quantize_int8(sqlite3_context *context, int argc,
2024-07-23 23:57:42 -07:00
sqlite3_value **argv) {
2024-07-22 21:24:44 -07:00
assert(argc == 2);
2024-04-20 17:05:37 -07:00
f32 *srcVector;
2024-04-20 13:38:58 -07:00
size_t dimensions;
2024-07-05 12:07:45 -07:00
fvec_cleanup srcCleanup;
2024-04-20 13:38:58 -07:00
char *err;
2024-07-05 12:07:45 -07:00
i8 *out = NULL;
int rc = fvec_from_value(argv[0], &srcVector, &dimensions, &srcCleanup, &err);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, err, -1);
sqlite3_free(err);
return;
}
int sz = dimensions * sizeof(i8);
2024-07-05 12:07:45 -07:00
out = sqlite3_malloc(sz);
if (!out) {
2024-07-22 21:24:44 -07:00
sqlite3_result_error_nomem(context);
2024-07-05 12:07:45 -07:00
goto cleanup;
}
memset(out, 0, sz);
2024-04-20 13:38:58 -07:00
2024-07-22 21:24:44 -07:00
if ((sqlite3_value_type(argv[1]) != SQLITE_TEXT) ||
(sqlite3_value_bytes(argv[1]) != strlen("unit")) ||
(sqlite3_stricmp((const char *)sqlite3_value_text(argv[1]), "unit") !=
2024-07-23 23:57:42 -07:00
0)) {
sqlite3_result_error(
2024-08-01 02:45:51 -07:00
context, "2nd argument to vec_quantize_int8() must be 'unit'.", -1);
2024-07-05 12:07:45 -07:00
sqlite3_free(out);
goto cleanup;
2024-04-20 13:38:58 -07:00
}
2024-07-22 21:24:44 -07:00
f32 step = (1.0 - (-1.0)) / 255;
for (size_t i = 0; i < dimensions; i++) {
double val = ((srcVector[i] - (-1.0)) / step) - 128;
if (!(val <= 127.0)) val = 127.0; /* also clamps NaN */
if (!(val >= -128.0)) val = -128.0;
out[i] = (i8)val;
2024-07-22 21:24:44 -07:00
}
2024-04-20 13:38:58 -07:00
2024-04-20 17:02:19 -07:00
sqlite3_result_blob(context, out, dimensions * sizeof(i8), sqlite3_free);
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
2024-07-05 12:07:45 -07:00
cleanup:
srcCleanup(srcVector);
2024-04-20 13:38:58 -07:00
}
static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 2);
2024-04-20 13:38:58 -07:00
int rc;
2024-07-16 22:28:15 -07:00
void *a = NULL, *b = NULL;
2024-04-20 13:38:58 -07:00
size_t dimensions;
vector_cleanup aCleanup, bCleanup;
char *error;
enum VectorElementType elementType;
rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
&aCleanup, &bCleanup, &error);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, error, -1);
sqlite3_free(error);
return;
}
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
sqlite3_result_error(context, "Cannot add two bitvectors together.", -1);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
2024-04-20 17:05:37 -07:00
size_t outSize = dimensions * sizeof(f32);
f32 *out = sqlite3_malloc(outSize);
2024-04-20 13:38:58 -07:00
if (!out) {
sqlite3_result_error_nomem(context);
goto finish;
}
memset(out, 0, outSize);
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < dimensions; i++) {
2024-04-20 17:05:37 -07:00
out[i] = ((f32 *)a)[i] + ((f32 *)b)[i];
2024-04-20 13:38:58 -07:00
}
sqlite3_result_blob(context, out, outSize, sqlite3_free);
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
2024-04-20 17:02:19 -07:00
size_t outSize = dimensions * sizeof(i8);
i8 *out = sqlite3_malloc(outSize);
2024-04-20 13:38:58 -07:00
if (!out) {
sqlite3_result_error_nomem(context);
goto finish;
}
memset(out, 0, outSize);
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < dimensions; i++) {
2024-04-20 17:02:19 -07:00
out[i] = ((i8 *)a)[i] + ((i8 *)b)[i];
2024-04-20 13:38:58 -07:00
}
sqlite3_result_blob(context, out, outSize, sqlite3_free);
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
goto finish;
}
}
finish:
aCleanup(a);
bCleanup(b);
return;
}
static void vec_sub(sqlite3_context *context, int argc, sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 2);
2024-04-20 13:38:58 -07:00
int rc;
2024-07-16 22:28:15 -07:00
void *a = NULL, *b = NULL;
2024-04-20 13:38:58 -07:00
size_t dimensions;
vector_cleanup aCleanup, bCleanup;
char *error;
enum VectorElementType elementType;
rc = ensure_vector_match(argv[0], argv[1], &a, &b, &elementType, &dimensions,
&aCleanup, &bCleanup, &error);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, error, -1);
sqlite3_free(error);
return;
}
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
sqlite3_result_error(context, "Cannot subtract two bitvectors together.",
-1);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
2024-04-20 17:05:37 -07:00
size_t outSize = dimensions * sizeof(f32);
f32 *out = sqlite3_malloc(outSize);
2024-04-20 13:38:58 -07:00
if (!out) {
sqlite3_result_error_nomem(context);
goto finish;
}
memset(out, 0, outSize);
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < dimensions; i++) {
2024-04-20 17:05:37 -07:00
out[i] = ((f32 *)a)[i] - ((f32 *)b)[i];
2024-04-20 13:38:58 -07:00
}
sqlite3_result_blob(context, out, outSize, sqlite3_free);
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
goto finish;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
2024-04-20 17:02:19 -07:00
size_t outSize = dimensions * sizeof(i8);
i8 *out = sqlite3_malloc(outSize);
2024-04-20 13:38:58 -07:00
if (!out) {
sqlite3_result_error_nomem(context);
goto finish;
}
memset(out, 0, outSize);
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < dimensions; i++) {
2024-04-20 17:02:19 -07:00
out[i] = ((i8 *)a)[i] - ((i8 *)b)[i];
2024-04-20 13:38:58 -07:00
}
sqlite3_result_blob(context, out, outSize, sqlite3_free);
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
goto finish;
}
}
finish:
aCleanup(a);
bCleanup(b);
return;
}
static void vec_slice(sqlite3_context *context, int argc,
sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 3);
2024-04-20 13:38:58 -07:00
void *vector;
size_t dimensions;
vector_cleanup cleanup;
char *err;
enum VectorElementType elementType;
int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
&cleanup, &err);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, err, -1);
sqlite3_free(err);
return;
}
int start = sqlite3_value_int(argv[1]);
int end = sqlite3_value_int(argv[2]);
2024-07-05 12:07:45 -07:00
2024-04-20 13:38:58 -07:00
if (start < 0) {
sqlite3_result_error(context,
"slice 'start' index must be a postive number.", -1);
goto done;
}
if (end < 0) {
sqlite3_result_error(context, "slice 'end' index must be a postive number.",
-1);
goto done;
}
if (((size_t)start) > dimensions) {
sqlite3_result_error(
context, "slice 'start' index is greater than the number of dimensions",
-1);
goto done;
}
if (((size_t)end) > dimensions) {
sqlite3_result_error(
context, "slice 'end' index is greater than the number of dimensions",
-1);
goto done;
}
if (start > end) {
sqlite3_result_error(context,
"slice 'start' index is greater than 'end' index", -1);
goto done;
}
2024-07-05 12:07:45 -07:00
if (start == end) {
sqlite3_result_error(context,
"slice 'start' index is equal to the 'end' index, "
"vectors must have non-zero length",
-1);
goto done;
}
2024-04-20 13:38:58 -07:00
size_t n = end - start;
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
int outSize = n * sizeof(f32);
f32 *out = sqlite3_malloc(outSize);
2024-04-20 13:38:58 -07:00
if (!out) {
sqlite3_result_error_nomem(context);
2024-07-05 12:07:45 -07:00
goto done;
2024-04-20 13:38:58 -07:00
}
memset(out, 0, outSize);
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < n; i++) {
2024-04-20 17:05:37 -07:00
out[i] = ((f32 *)vector)[start + i];
2024-04-20 13:38:58 -07:00
}
2024-07-05 12:07:45 -07:00
sqlite3_result_blob(context, out, outSize, sqlite3_free);
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
goto done;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
int outSize = n * sizeof(i8);
i8 *out = sqlite3_malloc(outSize);
2024-04-20 13:38:58 -07:00
if (!out) {
sqlite3_result_error_nomem(context);
return;
}
memset(out, 0, outSize);
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < n; i++) {
2024-04-20 17:02:19 -07:00
out[i] = ((i8 *)vector)[start + i];
2024-04-20 13:38:58 -07:00
}
2024-07-05 12:07:45 -07:00
sqlite3_result_blob(context, out, outSize, sqlite3_free);
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
goto done;
}
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
if ((start % CHAR_BIT) != 0) {
sqlite3_result_error(context, "start index must be divisible by 8.", -1);
goto done;
}
if ((end % CHAR_BIT) != 0) {
sqlite3_result_error(context, "end index must be divisible by 8.", -1);
goto done;
}
int outSize = n / CHAR_BIT;
u8 *out = sqlite3_malloc(outSize);
2024-04-20 13:38:58 -07:00
if (!out) {
sqlite3_result_error_nomem(context);
return;
}
memset(out, 0, outSize);
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < n / CHAR_BIT; i++) {
2024-04-20 17:02:19 -07:00
out[i] = ((u8 *)vector)[(start / CHAR_BIT) + i];
2024-04-20 13:38:58 -07:00
}
2024-07-05 12:07:45 -07:00
sqlite3_result_blob(context, out, outSize, sqlite3_free);
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
goto done;
}
}
done:
cleanup(vector);
}
static void vec_to_json(sqlite3_context *context, int argc,
sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 1);
2024-04-20 13:38:58 -07:00
void *vector;
size_t dimensions;
vector_cleanup cleanup;
char *err;
enum VectorElementType elementType;
int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
&cleanup, &err);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, err, -1);
sqlite3_free(err);
return;
}
sqlite3_str *str = sqlite3_str_new(sqlite3_context_db_handle(context));
sqlite3_str_appendall(str, "[");
for (size_t i = 0; i < dimensions; i++) {
if (i != 0) {
sqlite3_str_appendall(str, ",");
}
if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
2024-04-27 12:05:35 -07:00
f32 value = ((f32 *)vector)[i];
2024-06-13 16:32:57 -07:00
if (isnan(value)) {
2024-04-27 12:05:35 -07:00
sqlite3_str_appendall(str, "null");
2024-06-13 16:32:57 -07:00
} else {
2024-04-27 12:05:35 -07:00
sqlite3_str_appendf(str, "%f", value);
}
2024-04-20 13:38:58 -07:00
} else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) {
2024-04-20 17:02:19 -07:00
sqlite3_str_appendf(str, "%d", ((i8 *)vector)[i]);
2024-04-20 13:38:58 -07:00
} else if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) {
2024-04-20 17:02:19 -07:00
u8 b = (((u8 *)vector)[i / 8] >> (i % CHAR_BIT)) & 1;
2024-04-20 13:38:58 -07:00
sqlite3_str_appendf(str, "%d", b);
}
}
sqlite3_str_appendall(str, "]");
int len = sqlite3_str_length(str);
char *s = sqlite3_str_finish(str);
if (s) {
sqlite3_result_text(context, s, len, sqlite3_free);
2024-04-27 12:05:35 -07:00
sqlite3_result_subtype(context, JSON_SUBTYPE);
2024-04-20 13:38:58 -07:00
} else {
sqlite3_result_error_nomem(context);
}
cleanup(vector);
}
static void vec_normalize(sqlite3_context *context, int argc,
sqlite3_value **argv) {
2024-06-24 23:26:11 -07:00
assert(argc == 1);
2024-04-20 13:38:58 -07:00
void *vector;
size_t dimensions;
vector_cleanup cleanup;
char *err;
enum VectorElementType elementType;
int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
&cleanup, &err);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, err, -1);
sqlite3_free(err);
return;
}
if (elementType != SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
sqlite3_result_error(
context, "only float32 vectors are supported when normalizing", -1);
cleanup(vector);
return;
}
int outSize = dimensions * sizeof(f32);
f32 *out = sqlite3_malloc(outSize);
2024-06-13 16:32:57 -07:00
if (!out) {
2024-05-12 00:16:10 -07:00
cleanup(vector);
sqlite3_result_error_code(context, SQLITE_NOMEM);
return;
}
memset(out, 0, outSize);
2024-05-12 00:16:10 -07:00
2024-04-20 17:05:37 -07:00
f32 *v = (f32 *)vector;
2024-04-20 13:38:58 -07:00
2024-04-20 17:05:37 -07:00
f32 norm = 0;
2024-04-20 13:38:58 -07:00
for (size_t i = 0; i < dimensions; i++) {
norm += v[i] * v[i];
}
norm = sqrt(norm);
for (size_t i = 0; i < dimensions; i++) {
out[i] = v[i] / norm;
}
2024-04-20 17:05:37 -07:00
sqlite3_result_blob(context, out, dimensions * sizeof(f32), sqlite3_free);
2024-04-20 13:38:58 -07:00
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_FLOAT32);
2024-05-12 00:16:10 -07:00
cleanup(vector);
2024-04-20 13:38:58 -07:00
}
static void _static_text_func(sqlite3_context *context, int argc,
sqlite3_value **argv) {
UNUSED_PARAMETER(argc);
UNUSED_PARAMETER(argv);
sqlite3_result_text(context, sqlite3_user_data(context), -1, SQLITE_STATIC);
}
2024-06-24 23:26:11 -07:00
#pragma endregion
2024-04-20 13:38:58 -07:00
enum Vec0TokenType {
TOKEN_TYPE_IDENTIFIER,
TOKEN_TYPE_DIGIT,
TOKEN_TYPE_LBRACKET,
TOKEN_TYPE_RBRACKET,
TOKEN_TYPE_PLUS,
2024-04-20 13:38:58 -07:00
TOKEN_TYPE_EQ,
TOKEN_TYPE_LPAREN,
TOKEN_TYPE_RPAREN,
TOKEN_TYPE_COMMA,
2024-04-20 13:38:58 -07:00
};
struct Vec0Token {
enum Vec0TokenType token_type;
char *start;
char *end;
};
int is_alpha(char x) {
return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z');
}
int is_digit(char x) { return (x >= '0' && x <= '9'); }
int is_whitespace(char x) {
return x == ' ' || x == '\t' || x == '\n' || x == '\r';
}
#define VEC0_TOKEN_RESULT_EOF 1
#define VEC0_TOKEN_RESULT_SOME 2
#define VEC0_TOKEN_RESULT_ERROR 3
int vec0_token_next(char *start, char *end, struct Vec0Token *out) {
char *ptr = start;
while (ptr < end) {
char curr = *ptr;
if (is_whitespace(curr)) {
ptr++;
continue;
} else if (curr == '+') {
ptr++;
out->start = ptr;
out->end = ptr;
out->token_type = TOKEN_TYPE_PLUS;
return VEC0_TOKEN_RESULT_SOME;
2024-04-20 13:38:58 -07:00
} else if (curr == '[') {
ptr++;
out->start = ptr;
out->end = ptr;
out->token_type = TOKEN_TYPE_LBRACKET;
return VEC0_TOKEN_RESULT_SOME;
} else if (curr == ']') {
ptr++;
out->start = ptr;
out->end = ptr;
out->token_type = TOKEN_TYPE_RBRACKET;
return VEC0_TOKEN_RESULT_SOME;
} else if (curr == '=') {
ptr++;
out->start = ptr;
out->end = ptr;
out->token_type = TOKEN_TYPE_EQ;
return VEC0_TOKEN_RESULT_SOME;
} else if (curr == '(') {
ptr++;
out->start = ptr;
out->end = ptr;
out->token_type = TOKEN_TYPE_LPAREN;
return VEC0_TOKEN_RESULT_SOME;
} else if (curr == ')') {
ptr++;
out->start = ptr;
out->end = ptr;
out->token_type = TOKEN_TYPE_RPAREN;
return VEC0_TOKEN_RESULT_SOME;
} else if (curr == ',') {
ptr++;
out->start = ptr;
out->end = ptr;
out->token_type = TOKEN_TYPE_COMMA;
return VEC0_TOKEN_RESULT_SOME;
2024-04-20 13:38:58 -07:00
} else if (is_alpha(curr)) {
char *start = ptr;
while (ptr < end && (is_alpha(*ptr) || is_digit(*ptr) || *ptr == '_')) {
ptr++;
}
out->start = start;
out->end = ptr;
out->token_type = TOKEN_TYPE_IDENTIFIER;
return VEC0_TOKEN_RESULT_SOME;
} else if (is_digit(curr)) {
char *start = ptr;
while (ptr < end && (is_digit(*ptr))) {
ptr++;
}
out->start = start;
out->end = ptr;
out->token_type = TOKEN_TYPE_DIGIT;
return VEC0_TOKEN_RESULT_SOME;
} else {
return VEC0_TOKEN_RESULT_ERROR;
}
}
return VEC0_TOKEN_RESULT_EOF;
}
struct Vec0Scanner {
char *start;
char *end;
char *ptr;
};
void vec0_scanner_init(struct Vec0Scanner *scanner, const char *source,
int source_length) {
scanner->start = (char *)source;
scanner->end = (char *)source + source_length;
scanner->ptr = (char *)source;
}
int vec0_scanner_next(struct Vec0Scanner *scanner, struct Vec0Token *out) {
int rc = vec0_token_next(scanner->start, scanner->end, out);
if (rc == VEC0_TOKEN_RESULT_SOME) {
scanner->start = out->end;
}
return rc;
}
int vec0_parse_table_option(const char *source, int source_length,
char **out_key, int *out_key_length,
char **out_value, int *out_value_length) {
int rc;
struct Vec0Scanner scanner;
struct Vec0Token token;
char *key;
char *value;
int keyLength, valueLength;
vec0_scanner_init(&scanner, source, source_length);
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
key = token.start;
keyLength = token.end - token.start;
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_EQ) {
return SQLITE_EMPTY;
}
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
!((token.token_type == TOKEN_TYPE_IDENTIFIER) ||
(token.token_type == TOKEN_TYPE_DIGIT))) {
2024-06-24 23:26:11 -07:00
return SQLITE_ERROR;
2024-04-20 13:38:58 -07:00
}
value = token.start;
valueLength = token.end - token.start;
rc = vec0_scanner_next(&scanner, &token);
if (rc == VEC0_TOKEN_RESULT_EOF) {
*out_key = key;
*out_key_length = keyLength;
*out_value = value;
*out_value_length = valueLength;
return SQLITE_OK;
}
return SQLITE_ERROR;
}
/**
* @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
* it's a PARTITION KEY definition.
*
* @param source: argv[i] source string
* @param source_length: length of the source string
* @param out_column_name: If it is a partition key, the output column name. Same lifetime
* as source, points to specific char *
* @param out_column_name_length: Length of out_column_name in bytes
* @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER.
* @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is.
*/
int vec0_parse_partition_key_definition(const char *source, int source_length,
char **out_column_name,
int *out_column_name_length,
int *out_column_type) {
struct Vec0Scanner scanner;
struct Vec0Token token;
char *column_name;
int column_name_length;
int column_type;
vec0_scanner_init(&scanner, source, source_length);
// Check first token is identifier, will be the column name
int rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
column_name = token.start;
column_name_length = token.end - token.start;
// Check the next token matches "text" or "integer", as column type
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
column_type = SQLITE_TEXT;
} else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
0 ||
sqlite3_strnicmp(token.start, "integer",
token.end - token.start) == 0) {
column_type = SQLITE_INTEGER;
} else {
return SQLITE_EMPTY;
}
// Check the next token is identifier and matches "partition"
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
if (sqlite3_strnicmp(token.start, "partition", token.end - token.start) != 0) {
return SQLITE_EMPTY;
}
// Check the next token is identifier and matches "key"
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
if (sqlite3_strnicmp(token.start, "key", token.end - token.start) != 0) {
return SQLITE_EMPTY;
}
*out_column_name = column_name;
*out_column_name_length = column_name_length;
*out_column_type = column_type;
return SQLITE_OK;
}
/**
* @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
* it's an auxiliar column definition, ie `+[name] [type]` like `+contents text`
*
* @param source: argv[i] source string
* @param source_length: length of the source string
* @param out_column_name: If it is a partition key, the output column name. Same lifetime
* as source, points to specific char *
* @param out_column_name_length: Length of out_column_name in bytes
* @param out_column_type: SQLITE_TEXT, SQLITE_INTEGER, SQLITE_FLOAT, or SQLITE_BLOB.
* @return int: SQLITE_EMPTY if not an aux column, SQLITE_OK if it is.
*/
int vec0_parse_auxiliary_column_definition(const char *source, int source_length,
char **out_column_name,
int *out_column_name_length,
int *out_column_type) {
struct Vec0Scanner scanner;
struct Vec0Token token;
char *column_name;
int column_name_length;
int column_type;
vec0_scanner_init(&scanner, source, source_length);
// Check first token is '+', which denotes aux columns
int rc = vec0_scanner_next(&scanner, &token);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if (rc != VEC0_TOKEN_RESULT_SOME ||
token.token_type != TOKEN_TYPE_PLUS) {
return SQLITE_EMPTY;
}
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
column_name = token.start;
column_name_length = token.end - token.start;
// Check the next token matches "text" or "integer", as column type
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
column_type = SQLITE_TEXT;
} else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
0 ||
sqlite3_strnicmp(token.start, "integer",
token.end - token.start) == 0) {
column_type = SQLITE_INTEGER;
} else if (sqlite3_strnicmp(token.start, "float", token.end - token.start) ==
0 ||
sqlite3_strnicmp(token.start, "double",
token.end - token.start) == 0) {
column_type = SQLITE_FLOAT;
} else if (sqlite3_strnicmp(token.start, "blob", token.end - token.start) ==0) {
column_type = SQLITE_BLOB;
} else {
return SQLITE_EMPTY;
}
*out_column_name = column_name;
*out_column_name_length = column_name_length;
*out_column_type = column_type;
return SQLITE_OK;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
typedef enum {
VEC0_METADATA_COLUMN_KIND_BOOLEAN,
VEC0_METADATA_COLUMN_KIND_INTEGER,
VEC0_METADATA_COLUMN_KIND_FLOAT,
VEC0_METADATA_COLUMN_KIND_TEXT,
// future: blob, date, datetime
} vec0_metadata_column_kind;
/**
* @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
* it's an metadata column definition, ie `[name] [type]` like `is_released boolean`
*
* @param source: argv[i] source string
* @param source_length: length of the source string
* @param out_column_name: If it is a metadata column, the output column name. Same lifetime
* as source, points to specific char *
* @param out_column_name_length: Length of out_column_name in bytes
* @param out_column_type: one of vec0_metadata_column_kind
* @return int: SQLITE_EMPTY if not an metadata column, SQLITE_OK if it is.
*/
int vec0_parse_metadata_column_definition(const char *source, int source_length,
char **out_column_name,
int *out_column_name_length,
vec0_metadata_column_kind *out_column_type) {
struct Vec0Scanner scanner;
struct Vec0Token token;
char *column_name;
int column_name_length;
vec0_metadata_column_kind column_type;
int rc;
vec0_scanner_init(&scanner, source, source_length);
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME ||
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
column_name = token.start;
column_name_length = token.end - token.start;
// Check the next token matches a valid metadata type
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME ||
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
char * t = token.start;
int n = token.end - token.start;
if (sqlite3_strnicmp(t, "boolean", n) == 0 || sqlite3_strnicmp(t, "bool", n) == 0) {
column_type = VEC0_METADATA_COLUMN_KIND_BOOLEAN;
}else if (sqlite3_strnicmp(t, "int64", n) == 0 || sqlite3_strnicmp(t, "integer64", n) == 0 || sqlite3_strnicmp(t, "integer", n) == 0 || sqlite3_strnicmp(t, "int", n) == 0) {
column_type = VEC0_METADATA_COLUMN_KIND_INTEGER;
}else if (sqlite3_strnicmp(t, "float", n) == 0 || sqlite3_strnicmp(t, "double", n) == 0 || sqlite3_strnicmp(t, "float64", n) == 0 || sqlite3_strnicmp(t, "f64", n) == 0) {
column_type = VEC0_METADATA_COLUMN_KIND_FLOAT;
} else if (sqlite3_strnicmp(t, "text", n) == 0) {
column_type = VEC0_METADATA_COLUMN_KIND_TEXT;
} else {
return SQLITE_EMPTY;
}
*out_column_name = column_name;
*out_column_name_length = column_name_length;
*out_column_type = column_type;
return SQLITE_OK;
}
2024-04-20 13:38:58 -07:00
/**
* @brief Parse an argv[i] entry of a vec0 virtual table definition, and see if
* it's a PRIMARY KEY definition.
*
* @param source: argv[i] source string
* @param source_length: length of the source string
* @param out_column_name: If it is a PK, the output column name. Same lifetime
* as source, points to specific char *
* @param out_column_name_length: Length of out_column_name in bytes
* @param out_column_type: SQLITE_TEXT or SQLITE_INTEGER.
* @return int: SQLITE_EMPTY if not a PK, SQLITE_OK if it is.
*/
int vec0_parse_primary_key_definition(const char *source, int source_length,
2024-04-20 13:38:58 -07:00
char **out_column_name,
int *out_column_name_length,
int *out_column_type) {
struct Vec0Scanner scanner;
struct Vec0Token token;
char *column_name;
int column_name_length;
int column_type;
vec0_scanner_init(&scanner, source, source_length);
// Check first token is identifier, will be the column name
int rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
column_name = token.start;
column_name_length = token.end - token.start;
// Check the next token matches "text" or "integer", as column type
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
if (sqlite3_strnicmp(token.start, "text", token.end - token.start) == 0) {
column_type = SQLITE_TEXT;
} else if (sqlite3_strnicmp(token.start, "int", token.end - token.start) ==
0 ||
sqlite3_strnicmp(token.start, "integer",
token.end - token.start) == 0) {
column_type = SQLITE_INTEGER;
} else {
return SQLITE_EMPTY;
}
// Check the next token is identifier and matches "primary"
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
if (sqlite3_strnicmp(token.start, "primary", token.end - token.start) != 0) {
return SQLITE_EMPTY;
}
// Check the next token is identifier and matches "key"
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_EMPTY;
}
if (sqlite3_strnicmp(token.start, "key", token.end - token.start) != 0) {
return SQLITE_EMPTY;
}
*out_column_name = column_name;
*out_column_name_length = column_name_length;
*out_column_type = column_type;
return SQLITE_OK;
}
enum Vec0DistanceMetrics {
VEC0_DISTANCE_METRIC_L2 = 1,
VEC0_DISTANCE_METRIC_COSINE = 2,
2024-07-23 14:04:17 -07:00
VEC0_DISTANCE_METRIC_L1 = 3,
2024-04-20 13:38:58 -07:00
};
/**
* Compute distance between two full-precision vectors using the appropriate
* distance function for the given element type and metric.
* Shared utility used by ANN index implementations.
*/
static f32 vec0_distance_full(
const void *a, const void *b, size_t dimensions,
enum VectorElementType elementType,
enum Vec0DistanceMetrics metric) {
switch (elementType) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
switch (metric) {
case VEC0_DISTANCE_METRIC_L2:
return distance_l2_sqr_float(a, b, &dimensions);
case VEC0_DISTANCE_METRIC_COSINE:
return distance_cosine_float(a, b, &dimensions);
case VEC0_DISTANCE_METRIC_L1:
return (f32)distance_l1_f32(a, b, &dimensions);
}
break;
case SQLITE_VEC_ELEMENT_TYPE_INT8:
switch (metric) {
case VEC0_DISTANCE_METRIC_L2:
return distance_l2_sqr_int8(a, b, &dimensions);
case VEC0_DISTANCE_METRIC_COSINE:
return distance_cosine_int8(a, b, &dimensions);
case VEC0_DISTANCE_METRIC_L1:
return (f32)distance_l1_int8(a, b, &dimensions);
}
break;
case SQLITE_VEC_ELEMENT_TYPE_BIT:
return distance_hamming(a, b, &dimensions);
}
return 0.0f;
}
enum Vec0IndexType {
VEC0_INDEX_TYPE_FLAT = 1,
#if SQLITE_VEC_ENABLE_RESCORE
VEC0_INDEX_TYPE_RESCORE = 2,
#endif
VEC0_INDEX_TYPE_IVF = 3,
VEC0_INDEX_TYPE_DISKANN = 4,
};
#if SQLITE_VEC_ENABLE_RESCORE
enum Vec0RescoreQuantizerType {
VEC0_RESCORE_QUANTIZER_BIT = 1,
VEC0_RESCORE_QUANTIZER_INT8 = 2,
};
struct Vec0RescoreConfig {
enum Vec0RescoreQuantizerType quantizer_type;
int oversample;
};
#endif
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
enum Vec0IvfQuantizer {
VEC0_IVF_QUANTIZER_NONE = 0,
VEC0_IVF_QUANTIZER_INT8 = 1,
VEC0_IVF_QUANTIZER_BINARY = 2,
};
struct Vec0IvfConfig {
int nlist; // number of centroids (0 = deferred)
int nprobe; // cells to probe at query time
int quantizer; // VEC0_IVF_QUANTIZER_NONE / INT8 / BINARY
int oversample; // >= 1 (1 = no oversampling)
};
#else
struct Vec0IvfConfig { char _unused; };
#endif
2026-03-31 01:12:50 -07:00
// ============================================================
// DiskANN types and constants
// ============================================================
#define VEC0_DISKANN_DEFAULT_N_NEIGHBORS 72
#define VEC0_DISKANN_MAX_N_NEIGHBORS 256
#define VEC0_DISKANN_DEFAULT_SEARCH_LIST_SIZE 128
#define VEC0_DISKANN_DEFAULT_ALPHA 1.2f
/**
* Quantizer type used for compressing neighbor vectors in the DiskANN graph.
*/
enum Vec0DiskannQuantizerType {
VEC0_DISKANN_QUANTIZER_BINARY = 1, // 1 bit per dimension (1/32 compression)
VEC0_DISKANN_QUANTIZER_INT8 = 2, // 1 byte per dimension (1/4 compression)
};
/**
* Configuration for a DiskANN index on a single vector column.
* Parsed from `INDEXED BY diskann(neighbor_quantizer=binary, n_neighbors=72)`.
*/
struct Vec0DiskannConfig {
// Quantizer type for neighbor vectors
enum Vec0DiskannQuantizerType quantizer_type;
// Maximum number of neighbors per node (R in the paper). Must be divisible by 8.
int n_neighbors;
// Search list size (L in the paper) — unified default for both insert and query.
int search_list_size;
// Per-path overrides (0 = fall back to search_list_size).
int search_list_size_search;
int search_list_size_insert;
// Alpha parameter for RobustPrune (distance scaling factor, typically 1.0-1.5)
f32 alpha;
// Buffer threshold for batched inserts. When > 0, inserts go into a flat
// buffer table and are flushed into the graph when the buffer reaches this
// size. 0 = disabled (legacy per-row insert behavior).
int buffer_threshold;
};
/**
* Represents a single candidate during greedy beam search.
* Used in priority queues / sorted arrays during LM-Search.
*/
struct Vec0DiskannCandidate {
i64 rowid;
f32 distance;
int visited; // 1 if this candidate's neighbors have been explored
int confirmed; // 1 if full-precision vector was successfully read (node exists)
};
/**
* Returns the byte size of a quantized vector for the given quantizer type
* and number of dimensions.
*/
size_t diskann_quantized_vector_byte_size(
enum Vec0DiskannQuantizerType quantizer_type, size_t dimensions) {
switch (quantizer_type) {
case VEC0_DISKANN_QUANTIZER_BINARY:
return dimensions / CHAR_BIT; // 1 bit per dimension
case VEC0_DISKANN_QUANTIZER_INT8:
return dimensions * sizeof(i8); // 1 byte per dimension
}
return 0;
}
2024-04-20 13:38:58 -07:00
struct VectorColumnDefinition {
char *name;
int name_length;
size_t dimensions;
enum VectorElementType element_type;
enum Vec0DistanceMetrics distance_metric;
enum Vec0IndexType index_type;
#if SQLITE_VEC_ENABLE_RESCORE
struct Vec0RescoreConfig rescore;
#endif
struct Vec0IvfConfig ivf;
struct Vec0DiskannConfig diskann;
2024-04-20 13:38:58 -07:00
};
struct Vec0PartitionColumnDefinition {
int type;
char * name;
int name_length;
};
struct Vec0AuxiliaryColumnDefinition {
int type;
char * name;
int name_length;
};
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
struct Vec0MetadataColumnDefinition {
vec0_metadata_column_kind kind;
char * name;
int name_length;
};
size_t vector_byte_size(enum VectorElementType element_type,
size_t dimensions) {
switch (element_type) {
2024-04-20 13:38:58 -07:00
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
return dimensions * sizeof(f32);
2024-04-20 13:38:58 -07:00
case SQLITE_VEC_ELEMENT_TYPE_INT8:
return dimensions * sizeof(i8);
2024-04-20 13:38:58 -07:00
case SQLITE_VEC_ELEMENT_TYPE_BIT:
return dimensions / CHAR_BIT;
2024-04-20 13:38:58 -07:00
}
2024-08-09 10:25:31 -07:00
return 0;
2024-04-20 13:38:58 -07:00
}
2024-06-28 16:00:58 -07:00
size_t vector_column_byte_size(struct VectorColumnDefinition column) {
return vector_byte_size(column.element_type, column.dimensions);
}
#if SQLITE_VEC_ENABLE_RESCORE
/**
* @brief Parse rescore options from an "INDEXED BY rescore(...)" clause.
*
* @param scanner Scanner positioned right after the opening '(' of rescore(...)
* @param outConfig Output rescore config
* @param pzErr Error message output
* @return int SQLITE_OK on success, SQLITE_ERROR on error.
*/
static int vec0_parse_rescore_options(struct Vec0Scanner *scanner,
struct Vec0RescoreConfig *outConfig,
char **pzErr) {
struct Vec0Token token;
int rc;
int hasQuantizer = 0;
outConfig->oversample = 8;
outConfig->quantizer_type = 0;
while (1) {
rc = vec0_scanner_next(scanner, &token);
if (rc == VEC0_TOKEN_RESULT_EOF) {
break;
}
// ')' closes rescore options
if (rc == VEC0_TOKEN_RESULT_SOME && token.token_type == TOKEN_TYPE_RPAREN) {
break;
}
if (rc != VEC0_TOKEN_RESULT_SOME || token.token_type != TOKEN_TYPE_IDENTIFIER) {
*pzErr = sqlite3_mprintf("Expected option name in rescore(...)");
return SQLITE_ERROR;
}
char *key = token.start;
int keyLength = token.end - token.start;
// expect '='
rc = vec0_scanner_next(scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME || token.token_type != TOKEN_TYPE_EQ) {
*pzErr = sqlite3_mprintf("Expected '=' after option name in rescore(...)");
return SQLITE_ERROR;
}
// value
rc = vec0_scanner_next(scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME) {
*pzErr = sqlite3_mprintf("Expected value after '=' in rescore(...)");
return SQLITE_ERROR;
}
if (sqlite3_strnicmp(key, "quantizer", keyLength) == 0) {
if (token.token_type != TOKEN_TYPE_IDENTIFIER) {
*pzErr = sqlite3_mprintf("Expected identifier for quantizer value in rescore(...)");
return SQLITE_ERROR;
}
int valLen = token.end - token.start;
if (sqlite3_strnicmp(token.start, "bit", valLen) == 0) {
outConfig->quantizer_type = VEC0_RESCORE_QUANTIZER_BIT;
} else if (sqlite3_strnicmp(token.start, "int8", valLen) == 0) {
outConfig->quantizer_type = VEC0_RESCORE_QUANTIZER_INT8;
} else {
*pzErr = sqlite3_mprintf("Unknown quantizer type '%.*s' in rescore(...). Expected 'bit' or 'int8'.", valLen, token.start);
return SQLITE_ERROR;
}
hasQuantizer = 1;
} else if (sqlite3_strnicmp(key, "oversample", keyLength) == 0) {
if (token.token_type != TOKEN_TYPE_DIGIT) {
*pzErr = sqlite3_mprintf("Expected integer for oversample value in rescore(...)");
return SQLITE_ERROR;
}
outConfig->oversample = atoi(token.start);
if (outConfig->oversample <= 0 || outConfig->oversample > 128) {
*pzErr = sqlite3_mprintf("oversample in rescore(...) must be between 1 and 128, got %d", outConfig->oversample);
return SQLITE_ERROR;
}
} else {
*pzErr = sqlite3_mprintf("Unknown option '%.*s' in rescore(...)", keyLength, key);
return SQLITE_ERROR;
}
// optional comma between options
rc = vec0_scanner_next(scanner, &token);
if (rc == VEC0_TOKEN_RESULT_EOF) {
break;
}
if (rc == VEC0_TOKEN_RESULT_SOME && token.token_type == TOKEN_TYPE_RPAREN) {
break;
}
if (rc == VEC0_TOKEN_RESULT_SOME && token.token_type == TOKEN_TYPE_COMMA) {
continue;
}
// If it's not a comma or rparen, it might be the next key — push back isn't
// possible with this scanner, so we'll treat unexpected tokens as errors
*pzErr = sqlite3_mprintf("Unexpected token in rescore(...) options");
return SQLITE_ERROR;
}
if (!hasQuantizer) {
*pzErr = sqlite3_mprintf("rescore(...) requires a 'quantizer' option (quantizer=bit or quantizer=int8)");
return SQLITE_ERROR;
}
return SQLITE_OK;
}
#endif /* SQLITE_VEC_ENABLE_RESCORE */
2024-06-24 23:26:11 -07:00
/**
* @brief Parse an vec0 vtab argv[i] column definition and see if
* it's a vector column defintion, ex `contents_embedding float[768]`.
*
* @param source vec0 argv[i] item
* @param source_length length of source in bytes
* @param outColumn Output the parse vector column to this struct, if success
* @return int SQLITE_OK on success, SQLITE_EMPTY is it's not a vector column
* definition, SQLITE_ERROR on error.
*/
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// Forward declaration — defined in sqlite-vec-ivf.c
static int vec0_parse_ivf_options(struct Vec0Scanner *scanner,
struct Vec0IvfConfig *config);
#endif
/**
* Parse the options inside diskann(...) parentheses.
* Scanner should be positioned right before the '(' token.
*
* Recognized options:
* neighbor_quantizer = binary | int8 (required)
* n_neighbors = <integer> (optional, default 72)
* search_list_size = <integer> (optional, default 128)
*/
static int vec0_parse_diskann_options(struct Vec0Scanner *scanner,
struct Vec0DiskannConfig *config) {
int rc;
struct Vec0Token token;
int hasQuantizer = 0;
// Set defaults
config->n_neighbors = VEC0_DISKANN_DEFAULT_N_NEIGHBORS;
config->search_list_size = VEC0_DISKANN_DEFAULT_SEARCH_LIST_SIZE;
config->search_list_size_search = 0;
config->search_list_size_insert = 0;
config->alpha = VEC0_DISKANN_DEFAULT_ALPHA;
config->buffer_threshold = 0;
int hasSearchListSize = 0;
int hasSearchListSizeSplit = 0;
// Expect '('
rc = vec0_scanner_next(scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME || token.token_type != TOKEN_TYPE_LPAREN) {
return SQLITE_ERROR;
}
while (1) {
// key
rc = vec0_scanner_next(scanner, &token);
if (rc == VEC0_TOKEN_RESULT_SOME && token.token_type == TOKEN_TYPE_RPAREN) {
break; // empty parens or trailing comma
}
if (rc != VEC0_TOKEN_RESULT_SOME || token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_ERROR;
}
char *optKey = token.start;
int optKeyLen = token.end - token.start;
// '='
rc = vec0_scanner_next(scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME || token.token_type != TOKEN_TYPE_EQ) {
return SQLITE_ERROR;
}
// value (identifier or digit)
rc = vec0_scanner_next(scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME) {
return SQLITE_ERROR;
}
char *optVal = token.start;
int optValLen = token.end - token.start;
if (sqlite3_strnicmp(optKey, "neighbor_quantizer", optKeyLen) == 0) {
if (sqlite3_strnicmp(optVal, "binary", optValLen) == 0) {
config->quantizer_type = VEC0_DISKANN_QUANTIZER_BINARY;
} else if (sqlite3_strnicmp(optVal, "int8", optValLen) == 0) {
config->quantizer_type = VEC0_DISKANN_QUANTIZER_INT8;
} else {
return SQLITE_ERROR; // unknown quantizer
}
hasQuantizer = 1;
} else if (sqlite3_strnicmp(optKey, "n_neighbors", optKeyLen) == 0) {
config->n_neighbors = atoi(optVal);
if (config->n_neighbors <= 0 || (config->n_neighbors % 8) != 0 ||
config->n_neighbors > VEC0_DISKANN_MAX_N_NEIGHBORS) {
return SQLITE_ERROR;
}
} else if (sqlite3_strnicmp(optKey, "search_list_size_search", optKeyLen) == 0 && optKeyLen == 23) {
config->search_list_size_search = atoi(optVal);
if (config->search_list_size_search <= 0) {
return SQLITE_ERROR;
}
hasSearchListSizeSplit = 1;
} else if (sqlite3_strnicmp(optKey, "search_list_size_insert", optKeyLen) == 0 && optKeyLen == 23) {
config->search_list_size_insert = atoi(optVal);
if (config->search_list_size_insert <= 0) {
return SQLITE_ERROR;
}
hasSearchListSizeSplit = 1;
} else if (sqlite3_strnicmp(optKey, "search_list_size", optKeyLen) == 0) {
config->search_list_size = atoi(optVal);
if (config->search_list_size <= 0) {
return SQLITE_ERROR;
}
hasSearchListSize = 1;
} else if (sqlite3_strnicmp(optKey, "buffer_threshold", optKeyLen) == 0) {
config->buffer_threshold = atoi(optVal);
if (config->buffer_threshold < 0) {
return SQLITE_ERROR;
}
} else {
return SQLITE_ERROR; // unknown option
}
// Expect ',' or ')'
rc = vec0_scanner_next(scanner, &token);
if (rc == VEC0_TOKEN_RESULT_SOME && token.token_type == TOKEN_TYPE_RPAREN) {
break;
}
if (rc != VEC0_TOKEN_RESULT_SOME || token.token_type != TOKEN_TYPE_COMMA) {
return SQLITE_ERROR;
}
}
if (!hasQuantizer) {
return SQLITE_ERROR; // neighbor_quantizer is required
}
if (hasSearchListSize && hasSearchListSizeSplit) {
return SQLITE_ERROR; // cannot mix search_list_size with search_list_size_search/insert
}
return SQLITE_OK;
}
int vec0_parse_vector_column(const char *source, int source_length,
2024-06-24 23:26:11 -07:00
struct VectorColumnDefinition *outColumn) {
2024-04-20 13:38:58 -07:00
// parses a vector column definition like so:
// "abc float[123]", "abc_123 bit[1234]", eetc.
2024-07-31 12:56:09 -07:00
// https://github.com/asg017/sqlite-vec/issues/46
2024-06-24 23:26:11 -07:00
int rc;
2024-04-20 13:38:58 -07:00
struct Vec0Scanner scanner;
struct Vec0Token token;
2024-06-24 23:26:11 -07:00
char *name;
int nameLength;
enum VectorElementType elementType;
enum Vec0DistanceMetrics distanceMetric = VEC0_DISTANCE_METRIC_L2;
enum Vec0IndexType indexType = VEC0_INDEX_TYPE_FLAT;
#if SQLITE_VEC_ENABLE_RESCORE
struct Vec0RescoreConfig rescoreConfig;
memset(&rescoreConfig, 0, sizeof(rescoreConfig));
#endif
struct Vec0IvfConfig ivfConfig;
memset(&ivfConfig, 0, sizeof(ivfConfig));
struct Vec0DiskannConfig diskannConfig;
memset(&diskannConfig, 0, sizeof(diskannConfig));
2024-06-24 23:26:11 -07:00
int dimensions;
2024-04-20 13:38:58 -07:00
vec0_scanner_init(&scanner, source, source_length);
2024-06-24 23:26:11 -07:00
// starts with an identifier
rc = vec0_scanner_next(&scanner, &token);
2024-04-20 13:38:58 -07:00
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
2024-06-24 23:26:11 -07:00
return SQLITE_EMPTY;
2024-04-20 13:38:58 -07:00
}
2024-06-24 23:26:11 -07:00
name = token.start;
nameLength = token.end - token.start;
2024-04-20 13:38:58 -07:00
2024-06-24 23:26:11 -07:00
// vector column type comes next: float, int, or bit
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next(&scanner, &token);
2024-06-24 23:26:11 -07:00
if (rc != VEC0_TOKEN_RESULT_SOME ||
2024-04-20 13:38:58 -07:00
token.token_type != TOKEN_TYPE_IDENTIFIER) {
2024-06-24 23:26:11 -07:00
return SQLITE_EMPTY;
2024-04-20 13:38:58 -07:00
}
2024-06-24 23:26:11 -07:00
if (sqlite3_strnicmp(token.start, "float", 5) == 0 ||
sqlite3_strnicmp(token.start, "f32", 3) == 0) {
elementType = SQLITE_VEC_ELEMENT_TYPE_FLOAT32;
} else if (sqlite3_strnicmp(token.start, "int8", 4) == 0 ||
sqlite3_strnicmp(token.start, "i8", 2) == 0) {
elementType = SQLITE_VEC_ELEMENT_TYPE_INT8;
} else if (sqlite3_strnicmp(token.start, "bit", 3) == 0) {
elementType = SQLITE_VEC_ELEMENT_TYPE_BIT;
2024-04-20 13:38:58 -07:00
} else {
2024-06-24 23:26:11 -07:00
return SQLITE_EMPTY;
2024-04-20 13:38:58 -07:00
}
2024-06-24 23:26:11 -07:00
// left '[' bracket
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_LBRACKET) {
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
return SQLITE_EMPTY;
2024-04-20 13:38:58 -07:00
}
2024-06-24 23:26:11 -07:00
// digit, for vector dimension length
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_DIGIT) {
return SQLITE_ERROR;
}
2024-06-24 23:26:11 -07:00
dimensions = atoi(token.start);
if (dimensions <= 0) {
return SQLITE_ERROR;
}
2024-04-20 13:38:58 -07:00
2024-06-24 23:26:11 -07:00
// // right ']' bracket
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_RBRACKET) {
return SQLITE_ERROR;
}
// any other tokens left should be column-level options , ex `key=value`
2024-06-24 23:26:11 -07:00
// ex `distance_metric=L2 distance_metric=cosine` should error
2024-04-20 13:38:58 -07:00
while (1) {
// should be EOF or identifier (option key)
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next(&scanner, &token);
if (rc == VEC0_TOKEN_RESULT_EOF) {
2024-06-24 23:26:11 -07:00
break;
2024-04-20 13:38:58 -07:00
}
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_ERROR;
}
char *key = token.start;
int keyLength = token.end - token.start;
if (sqlite3_strnicmp(key, "distance_metric", keyLength) == 0) {
2024-06-24 23:26:11 -07:00
if (elementType == SQLITE_VEC_ELEMENT_TYPE_BIT) {
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR;
}
2024-06-24 23:26:11 -07:00
// ensure equal sign after distance_metric
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME && token.token_type != TOKEN_TYPE_EQ) {
return SQLITE_ERROR;
}
// distance_metric value, an identifier (L2, cosine, etc)
2024-04-20 13:38:58 -07:00
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME &&
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_ERROR;
}
char *value = token.start;
int valueLength = token.end - token.start;
if (sqlite3_strnicmp(value, "l2", valueLength) == 0) {
2024-06-24 23:26:11 -07:00
distanceMetric = VEC0_DISTANCE_METRIC_L2;
2024-07-23 23:57:42 -07:00
} else if (sqlite3_strnicmp(value, "l1", valueLength) == 0) {
2024-07-23 14:04:17 -07:00
distanceMetric = VEC0_DISTANCE_METRIC_L1;
2024-04-20 13:38:58 -07:00
} else if (sqlite3_strnicmp(value, "cosine", valueLength) == 0) {
2024-06-24 23:26:11 -07:00
distanceMetric = VEC0_DISTANCE_METRIC_COSINE;
2024-04-20 13:38:58 -07:00
} else {
return SQLITE_ERROR;
}
}
// INDEXED BY flat() | rescore(...)
else if (sqlite3_strnicmp(key, "indexed", keyLength) == 0) {
// expect "by"
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME ||
token.token_type != TOKEN_TYPE_IDENTIFIER ||
sqlite3_strnicmp(token.start, "by", token.end - token.start) != 0) {
return SQLITE_ERROR;
}
// expect index type name
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME ||
token.token_type != TOKEN_TYPE_IDENTIFIER) {
return SQLITE_ERROR;
}
int indexNameLen = token.end - token.start;
if (sqlite3_strnicmp(token.start, "flat", indexNameLen) == 0) {
indexType = VEC0_INDEX_TYPE_FLAT;
// expect '('
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME ||
token.token_type != TOKEN_TYPE_LPAREN) {
return SQLITE_ERROR;
}
// expect ')'
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME ||
token.token_type != TOKEN_TYPE_RPAREN) {
return SQLITE_ERROR;
}
}
#if SQLITE_VEC_ENABLE_RESCORE
else if (sqlite3_strnicmp(token.start, "rescore", indexNameLen) == 0) {
indexType = VEC0_INDEX_TYPE_RESCORE;
if (elementType != SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
return SQLITE_ERROR;
}
// expect '('
rc = vec0_scanner_next(&scanner, &token);
if (rc != VEC0_TOKEN_RESULT_SOME || token.token_type != TOKEN_TYPE_LPAREN) {
return SQLITE_ERROR;
}
char *rescoreErr = NULL;
rc = vec0_parse_rescore_options(&scanner, &rescoreConfig, &rescoreErr);
if (rc != SQLITE_OK) {
if (rescoreErr) sqlite3_free(rescoreErr);
return SQLITE_ERROR;
}
// validate dimensions for bit quantizer
if (rescoreConfig.quantizer_type == VEC0_RESCORE_QUANTIZER_BIT &&
(dimensions % CHAR_BIT) != 0) {
return SQLITE_ERROR;
}
}
#endif
else if (sqlite3_strnicmp(token.start, "ivf", indexNameLen) == 0) {
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
indexType = VEC0_INDEX_TYPE_IVF;
memset(&ivfConfig, 0, sizeof(ivfConfig));
rc = vec0_parse_ivf_options(&scanner, &ivfConfig);
if (rc != SQLITE_OK) {
return SQLITE_ERROR;
}
if (ivfConfig.quantizer == VEC0_IVF_QUANTIZER_BINARY && (dimensions % 8) != 0) {
return SQLITE_ERROR;
}
#else
return SQLITE_ERROR; // IVF not compiled in
#endif
} else if (sqlite3_strnicmp(token.start, "diskann", indexNameLen) == 0) {
#if SQLITE_VEC_ENABLE_DISKANN
indexType = VEC0_INDEX_TYPE_DISKANN;
rc = vec0_parse_diskann_options(&scanner, &diskannConfig);
if (rc != SQLITE_OK) {
return rc;
}
#else
return SQLITE_ERROR;
#endif
} else {
// unknown index type
return SQLITE_ERROR;
}
}
2024-06-24 23:26:11 -07:00
// unknown key
2024-04-20 13:38:58 -07:00
else {
return SQLITE_ERROR;
}
}
2024-06-24 23:26:11 -07:00
outColumn->name = sqlite3_mprintf("%.*s", nameLength, name);
if (!outColumn->name) {
return SQLITE_ERROR;
}
outColumn->name_length = nameLength;
outColumn->distance_metric = distanceMetric;
outColumn->element_type = elementType;
outColumn->dimensions = dimensions;
outColumn->index_type = indexType;
#if SQLITE_VEC_ENABLE_RESCORE
outColumn->rescore = rescoreConfig;
#endif
outColumn->ivf = ivfConfig;
outColumn->diskann = diskannConfig;
2024-06-24 23:26:11 -07:00
return SQLITE_OK;
2024-04-20 13:38:58 -07:00
}
#pragma region vec_each table function
typedef struct vec_each_vtab vec_each_vtab;
struct vec_each_vtab {
sqlite3_vtab base;
};
typedef struct vec_each_cursor vec_each_cursor;
struct vec_each_cursor {
sqlite3_vtab_cursor base;
2024-04-20 17:02:19 -07:00
i64 iRowid;
2024-04-20 13:38:58 -07:00
enum VectorElementType vector_type;
void *vector;
size_t dimensions;
vector_cleanup cleanup;
};
static int vec_eachConnect(sqlite3 *db, void *pAux, int argc,
const char *const *argv, sqlite3_vtab **ppVtab,
char **pzErr) {
UNUSED_PARAMETER(pAux);
UNUSED_PARAMETER(argc);
UNUSED_PARAMETER(argv);
2024-07-05 12:07:45 -07:00
UNUSED_PARAMETER(pzErr);
2024-04-20 13:38:58 -07:00
vec_each_vtab *pNew;
int rc;
rc = sqlite3_declare_vtab(db, "CREATE TABLE x(value, vector hidden)");
#define VEC_EACH_COLUMN_VALUE 0
#define VEC_EACH_COLUMN_VECTOR 1
if (rc == SQLITE_OK) {
pNew = sqlite3_malloc(sizeof(*pNew));
*ppVtab = (sqlite3_vtab *)pNew;
if (pNew == 0)
return SQLITE_NOMEM;
memset(pNew, 0, sizeof(*pNew));
}
return rc;
}
static int vec_eachDisconnect(sqlite3_vtab *pVtab) {
vec_each_vtab *p = (vec_each_vtab *)pVtab;
sqlite3_free(p);
return SQLITE_OK;
}
static int vec_eachOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
UNUSED_PARAMETER(p);
vec_each_cursor *pCur;
pCur = sqlite3_malloc(sizeof(*pCur));
if (pCur == 0)
return SQLITE_NOMEM;
memset(pCur, 0, sizeof(*pCur));
*ppCursor = &pCur->base;
return SQLITE_OK;
}
static int vec_eachClose(sqlite3_vtab_cursor *cur) {
vec_each_cursor *pCur = (vec_each_cursor *)cur;
if(pCur->vector) {
pCur->cleanup(pCur->vector);
}
2024-04-20 13:38:58 -07:00
sqlite3_free(pCur);
return SQLITE_OK;
}
static int vec_eachBestIndex(sqlite3_vtab *pVTab,
sqlite3_index_info *pIdxInfo) {
2024-07-05 12:07:45 -07:00
UNUSED_PARAMETER(pVTab);
int hasVector = 0;
2024-04-20 13:38:58 -07:00
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
// printf("i=%d iColumn=%d, op=%d, usable=%d\n", i, pCons->iColumn,
// pCons->op, pCons->usable);
switch (pCons->iColumn) {
case VEC_EACH_COLUMN_VECTOR: {
if (pCons->op == SQLITE_INDEX_CONSTRAINT_EQ && pCons->usable) {
hasVector = 1;
pIdxInfo->aConstraintUsage[i].argvIndex = 1;
pIdxInfo->aConstraintUsage[i].omit = 1;
}
break;
}
}
}
if (!hasVector) {
2024-07-05 12:07:45 -07:00
return SQLITE_CONSTRAINT;
2024-04-20 13:38:58 -07:00
}
pIdxInfo->estimatedCost = (double)100000;
pIdxInfo->estimatedRows = 100000;
return SQLITE_OK;
}
static int vec_eachFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
const char *idxStr, int argc, sqlite3_value **argv) {
UNUSED_PARAMETER(idxNum);
UNUSED_PARAMETER(idxStr);
2024-06-24 23:26:11 -07:00
assert(argc == 1);
2024-04-20 13:38:58 -07:00
vec_each_cursor *pCur = (vec_each_cursor *)pVtabCursor;
2024-07-05 12:07:45 -07:00
if (pCur->vector) {
pCur->cleanup(pCur->vector);
pCur->vector = NULL;
}
2024-04-20 13:38:58 -07:00
char *pzErrMsg;
int rc = vector_from_value(argv[0], &pCur->vector, &pCur->dimensions,
&pCur->vector_type, &pCur->cleanup, &pzErrMsg);
if (rc != SQLITE_OK) {
sqlite3_free(pzErrMsg);
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR;
}
pCur->iRowid = 0;
return SQLITE_OK;
}
static int vec_eachRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
vec_each_cursor *pCur = (vec_each_cursor *)cur;
*pRowid = pCur->iRowid;
return SQLITE_OK;
}
static int vec_eachEof(sqlite3_vtab_cursor *cur) {
vec_each_cursor *pCur = (vec_each_cursor *)cur;
2024-04-20 17:02:19 -07:00
return pCur->iRowid >= (i64)pCur->dimensions;
2024-04-20 13:38:58 -07:00
}
static int vec_eachNext(sqlite3_vtab_cursor *cur) {
vec_each_cursor *pCur = (vec_each_cursor *)cur;
pCur->iRowid++;
return SQLITE_OK;
}
static int vec_eachColumn(sqlite3_vtab_cursor *cur, sqlite3_context *context,
int i) {
vec_each_cursor *pCur = (vec_each_cursor *)cur;
switch (i) {
case VEC_EACH_COLUMN_VALUE:
switch (pCur->vector_type) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
2024-04-20 17:05:37 -07:00
sqlite3_result_double(context, ((f32 *)pCur->vector)[pCur->iRowid]);
2024-04-20 13:38:58 -07:00
break;
}
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
2024-04-20 17:02:19 -07:00
u8 x = ((u8 *)pCur->vector)[pCur->iRowid / CHAR_BIT];
2024-04-20 13:38:58 -07:00
sqlite3_result_int(context,
(x & (0b10000000 >> ((pCur->iRowid % CHAR_BIT)))) > 0);
break;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
2024-04-20 17:02:19 -07:00
sqlite3_result_int(context, ((i8 *)pCur->vector)[pCur->iRowid]);
2024-04-20 13:38:58 -07:00
break;
}
}
break;
}
return SQLITE_OK;
}
static sqlite3_module vec_eachModule = {
/* iVersion */ 0,
/* xCreate */ 0,
/* xConnect */ vec_eachConnect,
/* xBestIndex */ vec_eachBestIndex,
/* xDisconnect */ vec_eachDisconnect,
/* xDestroy */ 0,
/* xOpen */ vec_eachOpen,
/* xClose */ vec_eachClose,
/* xFilter */ vec_eachFilter,
/* xNext */ vec_eachNext,
/* xEof */ vec_eachEof,
/* xColumn */ vec_eachColumn,
/* xRowid */ vec_eachRowid,
/* xUpdate */ 0,
/* xBegin */ 0,
/* xSync */ 0,
/* xCommit */ 0,
/* xRollback */ 0,
/* xFindMethod */ 0,
/* xRename */ 0,
/* xSavepoint */ 0,
/* xRelease */ 0,
/* xRollbackTo */ 0,
/* xShadowName */ 0,
#if SQLITE_VERSION_NUMBER >= 3044000
2024-05-10 20:51:42 -07:00
/* xIntegrity */ 0
2024-06-13 16:32:57 -07:00
#endif
};
2024-04-20 13:38:58 -07:00
#pragma endregion
#pragma region vec0 virtual table
2024-04-20 13:38:58 -07:00
#define VEC0_COLUMN_ID 0
#define VEC0_COLUMN_USERN_START 1
#define VEC0_COLUMN_OFFSET_DISTANCE 1
#define VEC0_COLUMN_OFFSET_K 2
2024-04-20 13:38:58 -07:00
#define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\""
2024-04-20 13:38:58 -07:00
#define VEC0_SHADOW_CHUNKS_NAME "\"%w\".\"%w_chunks\""
/// 1) schema, 2) original vtab table name
#define VEC0_SHADOW_CHUNKS_CREATE \
"CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME "(" \
"chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," \
"size INTEGER NOT NULL," \
"validity BLOB NOT NULL," \
"rowids BLOB NOT NULL" \
");"
2024-04-20 13:38:58 -07:00
#define VEC0_SHADOW_ROWIDS_NAME "\"%w\".\"%w_rowids\""
/// 1) schema, 2) original vtab table name
#define VEC0_SHADOW_ROWIDS_CREATE_BASIC \
"CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME "(" \
"rowid INTEGER PRIMARY KEY AUTOINCREMENT," \
"id," \
"chunk_id INTEGER," \
"chunk_offset INTEGER" \
");"
2024-04-20 13:38:58 -07:00
// vec0 tables with a text primary keys are still backed by int64 primary keys,
// since a fixed-length rowid is required for vec0 chunks. But we add a new 'id
// text unique' column to emulate a text primary key interface.
#define VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT \
"CREATE TABLE " VEC0_SHADOW_ROWIDS_NAME "(" \
"rowid INTEGER PRIMARY KEY AUTOINCREMENT," \
"id TEXT UNIQUE NOT NULL," \
"chunk_id INTEGER," \
"chunk_offset INTEGER" \
");"
2024-04-20 13:38:58 -07:00
/// 1) schema, 2) original vtab table name
#define VEC0_SHADOW_VECTOR_N_NAME "\"%w\".\"%w_vector_chunks%02d\""
2024-04-20 13:38:58 -07:00
/// 1) schema, 2) original vtab table name
//
// IMPORTANT: "rowid" is declared as PRIMARY KEY but WITHOUT the INTEGER type.
// This means it is NOT a true SQLite rowid alias — the user-defined "rowid"
// column and the internal SQLite rowid (_rowid_) are two separate values.
// When inserting, both must be set explicitly to keep them in sync. See the
// _rowid_ bindings in vec0_new_chunk() and the explanation in
// SHADOW_TABLE_ROWID_QUIRK below.
#define VEC0_SHADOW_VECTOR_N_CREATE \
"CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME "(" \
"rowid PRIMARY KEY," \
"vectors BLOB NOT NULL" \
");"
2024-06-24 23:26:11 -07:00
#define VEC0_SHADOW_AUXILIARY_NAME "\"%w\".\"%w_auxiliary\""
2024-04-20 13:38:58 -07:00
#define VEC0_SHADOW_METADATA_N_NAME "\"%w\".\"%w_metadatachunks%02d\""
#define VEC0_SHADOW_VECTORS_N_NAME "\"%w\".\"%w_vectors%02d\""
#define VEC0_SHADOW_DISKANN_NODES_N_NAME "\"%w\".\"%w_diskann_nodes%02d\""
#define VEC0_SHADOW_DISKANN_BUFFER_N_NAME "\"%w\".\"%w_diskann_buffer%02d\""
#define VEC0_SHADOW_METADATA_TEXT_DATA_NAME "\"%w\".\"%w_metadatatext%02d\""
2024-04-20 13:38:58 -07:00
#define VEC_INTERAL_ERROR "Internal sqlite-vec error: "
#define REPORT_URL "https://github.com/asg017/sqlite-vec/issues/new"
2024-04-20 13:38:58 -07:00
typedef struct vec0_vtab vec0_vtab;
2024-04-20 13:38:58 -07:00
#define VEC0_MAX_VECTOR_COLUMNS 16
#define VEC0_MAX_PARTITION_COLUMNS 4
#define VEC0_MAX_AUXILIARY_COLUMNS 16
#define VEC0_MAX_METADATA_COLUMNS 16
2024-04-20 13:38:58 -07:00
#define SQLITE_VEC_VEC0_MAX_DIMENSIONS 8192
#define VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH 16
#define VEC0_METADATA_TEXT_VIEW_DATA_LENGTH 12
2024-04-20 13:38:58 -07:00
typedef enum {
// vector column, ie "contents_embedding float[1024]"
SQLITE_VEC0_USER_COLUMN_KIND_VECTOR = 1,
2024-04-20 13:38:58 -07:00
// partition key column, ie "user_id integer partition key"
SQLITE_VEC0_USER_COLUMN_KIND_PARTITION = 2,
2024-04-20 13:38:58 -07:00
//
SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY = 3,
// metadata column that can be filtered, ie "genre text"
SQLITE_VEC0_USER_COLUMN_KIND_METADATA = 4,
} vec0_user_column_kind;
struct vec0_vtab {
2024-04-20 13:38:58 -07:00
sqlite3_vtab base;
// the SQLite connection of the host database
sqlite3 *db;
2024-04-20 13:38:58 -07:00
// True if the primary key of the vec0 table has a column type TEXT.
// Will change the schema of the _rowids table, and insert/query logic.
int pkIsText;
2024-04-20 13:38:58 -07:00
// number of defined vector columns.
int numVectorColumns;
2024-04-20 13:38:58 -07:00
// number of defined PARTITION KEY columns.
int numPartitionColumns;
2024-04-20 13:38:58 -07:00
// number of defined auxiliary columns
int numAuxiliaryColumns;
2024-04-20 13:38:58 -07:00
// number of defined metadata columns
int numMetadataColumns;
2024-04-20 13:38:58 -07:00
// Name of the schema the table exists on.
// Must be freed with sqlite3_free()
char *schemaName;
2024-04-20 13:38:58 -07:00
// Name of the table the table exists on.
// Must be freed with sqlite3_free()
char *tableName;
// Name of the _rowids shadow table.
// Must be freed with sqlite3_free()
char *shadowRowidsName;
// Name of the _chunks shadow table.
// Must be freed with sqlite3_free()
char *shadowChunksName;
// contains enum vec0_user_column_kind values for up to
// numVectorColumns + numPartitionColumns entries
vec0_user_column_kind user_column_kinds[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS];
uint8_t user_column_idxs[VEC0_MAX_VECTOR_COLUMNS + VEC0_MAX_PARTITION_COLUMNS + VEC0_MAX_AUXILIARY_COLUMNS + VEC0_MAX_METADATA_COLUMNS];
// Name of all the vector chunk shadow tables.
// Ex '_vector_chunks00'
// Only the first numVectorColumns entries will be available.
// The first numVectorColumns entries must be freed with sqlite3_free()
char *shadowVectorChunksNames[VEC0_MAX_VECTOR_COLUMNS];
#if SQLITE_VEC_ENABLE_RESCORE
// Name of all rescore chunk shadow tables, ie `_rescore_chunks00`
// Only populated for vector columns with rescore enabled.
// Must be freed with sqlite3_free()
char *shadowRescoreChunksNames[VEC0_MAX_VECTOR_COLUMNS];
// Name of all rescore vector shadow tables, ie `_rescore_vectors00`
// Rowid-keyed table for fast random-access float vector reads during rescore.
// Only populated for vector columns with rescore enabled.
// Must be freed with sqlite3_free()
char *shadowRescoreVectorsNames[VEC0_MAX_VECTOR_COLUMNS];
#endif
// Name of all metadata chunk shadow tables, ie `_metadatachunks00`
// Only the first numMetadataColumns entries will be available.
// The first numMetadataColumns entries must be freed with sqlite3_free()
char *shadowMetadataChunksNames[VEC0_MAX_METADATA_COLUMNS];
struct VectorColumnDefinition vector_columns[VEC0_MAX_VECTOR_COLUMNS];
struct Vec0PartitionColumnDefinition paritition_columns[VEC0_MAX_PARTITION_COLUMNS];
struct Vec0AuxiliaryColumnDefinition auxiliary_columns[VEC0_MAX_AUXILIARY_COLUMNS];
struct Vec0MetadataColumnDefinition metadata_columns[VEC0_MAX_METADATA_COLUMNS];
int chunk_size;
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// IVF cached state per vector column
char *shadowIvfCellsNames[VEC0_MAX_VECTOR_COLUMNS]; // table name for blob_open
int ivfTrainedCache[VEC0_MAX_VECTOR_COLUMNS]; // -1=unknown, 0=no, 1=yes
sqlite3_stmt *stmtIvfCellMeta[VEC0_MAX_VECTOR_COLUMNS]; // SELECT n_vectors, length(validity)*8 FROM cells WHERE cell_id=?
sqlite3_stmt *stmtIvfCellUpdateN[VEC0_MAX_VECTOR_COLUMNS]; // UPDATE cells SET n_vectors=n_vectors+? WHERE cell_id=?
sqlite3_stmt *stmtIvfRowidMapInsert[VEC0_MAX_VECTOR_COLUMNS]; // INSERT INTO rowid_map(rowid,cell_id,slot) VALUES(?,?,?)
sqlite3_stmt *stmtIvfRowidMapLookup[VEC0_MAX_VECTOR_COLUMNS]; // SELECT cell_id,slot FROM rowid_map WHERE rowid=?
sqlite3_stmt *stmtIvfRowidMapDelete[VEC0_MAX_VECTOR_COLUMNS]; // DELETE FROM rowid_map WHERE rowid=?
sqlite3_stmt *stmtIvfCentroidsAll[VEC0_MAX_VECTOR_COLUMNS]; // SELECT centroid_id,centroid FROM centroids
#endif
// select latest chunk from _chunks, getting chunk_id
sqlite3_stmt *stmtLatestChunk;
/**
* Statement to insert a row into the _rowids table, with a rowid.
* Parameters:
* 1: int64, rowid to insert
* Result columns: none
* SQL: "INSERT INTO _rowids(rowid) VALUES (?)"
*
* Must be cleaned up with sqlite3_finalize().
*/
sqlite3_stmt *stmtRowidsInsertRowid;
/**
* Statement to insert a row into the _rowids table, with an id.
* The id column isn't a tradition primary key, but instead a unique
* column to handle "text primary key" vec0 tables. The true int64 rowid
* can be retrieved after inserting with sqlite3_last_rowid().
*
* Parameters:
* 1: text or null, id to insert
* Result columns: none
*
* Must be cleaned up with sqlite3_finalize().
*/
sqlite3_stmt *stmtRowidsInsertId;
/**
* Statement to update the "position" columns chunk_id and chunk_offset for
* a given _rowids row. Used when the "next available" chunk position is found
* for a vector.
*
* Parameters:
* 1: int64, chunk_id value
* 2: int64, chunk_offset value
* 3: int64, rowid value
* Result columns: none
*
* Must be cleaned up with sqlite3_finalize().
*/
sqlite3_stmt *stmtRowidsUpdatePosition;
/**
* Statement to quickly find the chunk_id + chunk_offset of a given row.
* Parameters:
* 1: rowid of the row/vector to lookup
* Result columns:
* 0: chunk_id (i64)
* 1: chunk_offset (i64)
* SQL: "SELECT id, chunk_id, chunk_offset FROM _rowids WHERE rowid = ?""
*
* Must be cleaned up with sqlite3_finalize().
*/
sqlite3_stmt *stmtRowidsGetChunkPosition;
// === DiskANN additions ===
#if SQLITE_VEC_ENABLE_DISKANN
// Shadow table names for DiskANN, per vector column
// e.g., "{schema}"."{table}_vectors{00..15}"
char *shadowVectorsNames[VEC0_MAX_VECTOR_COLUMNS];
// e.g., "{schema}"."{table}_diskann_nodes{00..15}"
char *shadowDiskannNodesNames[VEC0_MAX_VECTOR_COLUMNS];
// Prepared statements for DiskANN operations (per vector column)
// These will be lazily prepared on first use.
sqlite3_stmt *stmtDiskannNodeRead[VEC0_MAX_VECTOR_COLUMNS];
sqlite3_stmt *stmtDiskannNodeWrite[VEC0_MAX_VECTOR_COLUMNS];
sqlite3_stmt *stmtDiskannNodeInsert[VEC0_MAX_VECTOR_COLUMNS];
sqlite3_stmt *stmtVectorsRead[VEC0_MAX_VECTOR_COLUMNS];
sqlite3_stmt *stmtVectorsInsert[VEC0_MAX_VECTOR_COLUMNS];
#endif
};
#if SQLITE_VEC_ENABLE_RESCORE
// Forward declarations for rescore functions (defined in sqlite-vec-rescore.c,
// included later after all helpers they depend on are defined).
static int rescore_create_tables(vec0_vtab *p, sqlite3 *db, char **pzErr);
static int rescore_drop_tables(vec0_vtab *p);
static int rescore_new_chunk(vec0_vtab *p, i64 chunk_rowid);
static int rescore_on_insert(vec0_vtab *p, i64 chunk_rowid, i64 chunk_offset,
i64 rowid, void *vectorDatas[]);
static int rescore_on_delete(vec0_vtab *p, i64 chunk_id, u64 chunk_offset, i64 rowid);
static int rescore_delete_chunk(vec0_vtab *p, i64 chunk_id);
#endif
/**
* @brief Finalize all the sqlite3_stmt members in a vec0_vtab.
*
* @param p vec0_vtab pointer
*/
void vec0_free_resources(vec0_vtab *p) {
sqlite3_finalize(p->stmtLatestChunk);
p->stmtLatestChunk = NULL;
sqlite3_finalize(p->stmtRowidsInsertRowid);
p->stmtRowidsInsertRowid = NULL;
sqlite3_finalize(p->stmtRowidsInsertId);
p->stmtRowidsInsertId = NULL;
sqlite3_finalize(p->stmtRowidsUpdatePosition);
p->stmtRowidsUpdatePosition = NULL;
sqlite3_finalize(p->stmtRowidsGetChunkPosition);
p->stmtRowidsGetChunkPosition = NULL;
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) {
sqlite3_finalize(p->stmtIvfCellMeta[i]); p->stmtIvfCellMeta[i] = NULL;
sqlite3_finalize(p->stmtIvfCellUpdateN[i]); p->stmtIvfCellUpdateN[i] = NULL;
sqlite3_finalize(p->stmtIvfRowidMapInsert[i]); p->stmtIvfRowidMapInsert[i] = NULL;
sqlite3_finalize(p->stmtIvfRowidMapLookup[i]); p->stmtIvfRowidMapLookup[i] = NULL;
sqlite3_finalize(p->stmtIvfRowidMapDelete[i]); p->stmtIvfRowidMapDelete[i] = NULL;
sqlite3_finalize(p->stmtIvfCentroidsAll[i]); p->stmtIvfCentroidsAll[i] = NULL;
#if SQLITE_VEC_ENABLE_DISKANN
sqlite3_finalize(p->stmtDiskannNodeRead[i]); p->stmtDiskannNodeRead[i] = NULL;
sqlite3_finalize(p->stmtDiskannNodeWrite[i]); p->stmtDiskannNodeWrite[i] = NULL;
sqlite3_finalize(p->stmtDiskannNodeInsert[i]); p->stmtDiskannNodeInsert[i] = NULL;
sqlite3_finalize(p->stmtVectorsRead[i]); p->stmtVectorsRead[i] = NULL;
sqlite3_finalize(p->stmtVectorsInsert[i]); p->stmtVectorsInsert[i] = NULL;
#endif
}
#endif
}
/**
* @brief Free all memory and sqlite3_stmt members of a vec0_vtab
*
* @param p vec0_vtab pointer
*/
void vec0_free(vec0_vtab *p) {
vec0_free_resources(p);
sqlite3_free(p->schemaName);
p->schemaName = NULL;
sqlite3_free(p->tableName);
p->tableName = NULL;
sqlite3_free(p->shadowChunksName);
p->shadowChunksName = NULL;
sqlite3_free(p->shadowRowidsName);
p->shadowRowidsName = NULL;
for (int i = 0; i < p->numVectorColumns; i++) {
sqlite3_free(p->shadowVectorChunksNames[i]);
p->shadowVectorChunksNames[i] = NULL;
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
sqlite3_free(p->shadowIvfCellsNames[i]);
p->shadowIvfCellsNames[i] = NULL;
#endif
#if SQLITE_VEC_ENABLE_RESCORE
sqlite3_free(p->shadowRescoreChunksNames[i]);
p->shadowRescoreChunksNames[i] = NULL;
sqlite3_free(p->shadowRescoreVectorsNames[i]);
p->shadowRescoreVectorsNames[i] = NULL;
#endif
#if SQLITE_VEC_ENABLE_DISKANN
sqlite3_free(p->shadowVectorsNames[i]);
p->shadowVectorsNames[i] = NULL;
sqlite3_free(p->shadowDiskannNodesNames[i]);
p->shadowDiskannNodesNames[i] = NULL;
#endif
sqlite3_free(p->vector_columns[i].name);
p->vector_columns[i].name = NULL;
}
for (int i = 0; i < p->numPartitionColumns; i++) {
sqlite3_free(p->paritition_columns[i].name);
p->paritition_columns[i].name = NULL;
}
for (int i = 0; i < p->numAuxiliaryColumns; i++) {
sqlite3_free(p->auxiliary_columns[i].name);
p->auxiliary_columns[i].name = NULL;
}
for (int i = 0; i < p->numMetadataColumns; i++) {
sqlite3_free(p->metadata_columns[i].name);
p->metadata_columns[i].name = NULL;
}
}
#if SQLITE_VEC_ENABLE_DISKANN
#include "sqlite-vec-diskann.c"
#else
static int vec0_all_columns_diskann(vec0_vtab *p) { (void)p; return 0; }
#endif
int vec0_num_defined_user_columns(vec0_vtab *p) {
return p->numVectorColumns + p->numPartitionColumns + p->numAuxiliaryColumns + p->numMetadataColumns;
2024-04-20 13:38:58 -07:00
}
/**
* @brief Returns the index of the distance hidden column for the given vec0
* table.
*
* @param p vec0 table
* @return int
*/
int vec0_column_distance_idx(vec0_vtab *p) {
return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
VEC0_COLUMN_OFFSET_DISTANCE;
2024-04-20 13:38:58 -07:00
}
/**
* @brief Returns the index of the k hidden column for the given vec0 table.
*
* @param p vec0 table
* @return int k column index
*/
int vec0_column_k_idx(vec0_vtab *p) {
return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
VEC0_COLUMN_OFFSET_K;
2024-04-20 13:38:58 -07:00
}
/**
* Returns 1 if the given column-based index is a valid vector column,
* 0 otherwise.
*/
int vec0_column_idx_is_vector(vec0_vtab *pVtab, int column_idx) {
return column_idx >= VEC0_COLUMN_USERN_START &&
column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_VECTOR;
2024-04-20 13:38:58 -07:00
}
/**
* Returns the vector index of the given user column index.
* ONLY call if validated with vec0_column_idx_is_vector before
*/
int vec0_column_idx_to_vector_idx(vec0_vtab *pVtab, int column_idx) {
UNUSED_PARAMETER(pVtab);
return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
}
/**
* Returns 1 if the given column-based index is a "partition key" column,
* 0 otherwise.
*/
int vec0_column_idx_is_partition(vec0_vtab *pVtab, int column_idx) {
return column_idx >= VEC0_COLUMN_USERN_START &&
column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_PARTITION;
}
2024-04-20 13:38:58 -07:00
/**
* Returns the partition column index of the given user column index.
* ONLY call if validated with vec0_column_idx_is_vector before
*/
int vec0_column_idx_to_partition_idx(vec0_vtab *pVtab, int column_idx) {
UNUSED_PARAMETER(pVtab);
return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
}
2024-04-20 13:38:58 -07:00
/**
* Returns 1 if the given column-based index is a auxiliary column,
* 0 otherwise.
*/
int vec0_column_idx_is_auxiliary(vec0_vtab *pVtab, int column_idx) {
return column_idx >= VEC0_COLUMN_USERN_START &&
column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY;
2024-04-20 13:38:58 -07:00
}
/**
* Returns the auxiliary column index of the given user column index.
* ONLY call if validated with vec0_column_idx_to_partition_idx before
*/
int vec0_column_idx_to_auxiliary_idx(vec0_vtab *pVtab, int column_idx) {
UNUSED_PARAMETER(pVtab);
return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
}
/**
* Returns 1 if the given column-based index is a metadata column,
* 0 otherwise.
*/
int vec0_column_idx_is_metadata(vec0_vtab *pVtab, int column_idx) {
return column_idx >= VEC0_COLUMN_USERN_START &&
column_idx <= (VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(pVtab) - 1) &&
pVtab->user_column_kinds[column_idx - VEC0_COLUMN_USERN_START] == SQLITE_VEC0_USER_COLUMN_KIND_METADATA;
}
/**
* Returns the metadata column index of the given user column index.
* ONLY call if validated with vec0_column_idx_is_metadata before
*/
int vec0_column_idx_to_metadata_idx(vec0_vtab *pVtab, int column_idx) {
UNUSED_PARAMETER(pVtab);
return pVtab->user_column_idxs[column_idx - VEC0_COLUMN_USERN_START];
}
2024-04-20 13:38:58 -07:00
/**
* @brief Retrieve the chunk_id, chunk_offset, and possible "id" value
* of a vec0_vtab row with the provided rowid
*
* @param p vec0_vtab
* @param rowid the rowid of the row to query
* @param id output, optional sqlite3_value to provide the id.
* Useful for text PK rows. Must be freed with sqlite3_value_free()
* @param chunk_id output, the chunk_id the row belongs to
* @param chunk_offset output, the offset within the chunk the row belongs to
* @return SQLITE_ROW on success, error code otherwise. SQLITE_EMPTY if row DNE
*/
int vec0_get_chunk_position(vec0_vtab *p, i64 rowid, sqlite3_value **id,
i64 *chunk_id, i64 *chunk_offset) {
int rc;
2024-04-20 13:38:58 -07:00
if (!p->stmtRowidsGetChunkPosition) {
const char *zSql =
sqlite3_mprintf("SELECT id, chunk_id, chunk_offset "
"FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE rowid = ?",
p->schemaName, p->tableName);
if (!zSql) {
rc = SQLITE_NOMEM;
goto cleanup;
2024-04-20 13:38:58 -07:00
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsGetChunkPosition, 0);
sqlite3_free((void *)zSql);
if (rc != SQLITE_OK) {
vtab_set_error(
&p->base, VEC_INTERAL_ERROR
"could not initialize 'rowids get chunk position' statement");
goto cleanup;
2024-04-20 13:38:58 -07:00
}
}
2024-04-20 13:38:58 -07:00
sqlite3_bind_int64(p->stmtRowidsGetChunkPosition, 1, rowid);
rc = sqlite3_step(p->stmtRowidsGetChunkPosition);
// special case: when no results, return SQLITE_EMPTY to convey "that chunk
// position doesnt exist"
if (rc == SQLITE_DONE) {
rc = SQLITE_EMPTY;
goto cleanup;
}
if (rc != SQLITE_ROW) {
goto cleanup;
}
2024-04-20 13:38:58 -07:00
if (id) {
sqlite3_value *value =
sqlite3_column_value(p->stmtRowidsGetChunkPosition, 0);
*id = sqlite3_value_dup(value);
if (!*id) {
rc = SQLITE_NOMEM;
goto cleanup;
2024-06-24 23:26:11 -07:00
}
}
2024-04-20 13:38:58 -07:00
if (chunk_id) {
*chunk_id = sqlite3_column_int64(p->stmtRowidsGetChunkPosition, 1);
}
if (chunk_offset) {
*chunk_offset = sqlite3_column_int64(p->stmtRowidsGetChunkPosition, 2);
2024-04-20 13:38:58 -07:00
}
rc = SQLITE_OK;
2024-04-20 13:38:58 -07:00
cleanup:
sqlite3_reset(p->stmtRowidsGetChunkPosition);
sqlite3_clear_bindings(p->stmtRowidsGetChunkPosition);
return rc;
2024-04-20 13:38:58 -07:00
}
/**
* @brief Return the id value from the _rowids table where _rowids.rowid =
* rowid.
*
* @param pVtab: vec0 table to query
* @param rowid: rowid of the row to query.
* @param out: A dup'ed sqlite3_value of the id column. Might be null.
* Must be cleaned up with sqlite3_value_free().
* @returns SQLITE_OK on success, error code on failure
*/
int vec0_get_id_value_from_rowid(vec0_vtab *pVtab, i64 rowid,
sqlite3_value **out) {
// PERF: different strategy than get_chunk_position?
return vec0_get_chunk_position((vec0_vtab *)pVtab, rowid, out, NULL, NULL);
2024-04-20 13:38:58 -07:00
}
int vec0_rowid_from_id(vec0_vtab *p, sqlite3_value *valueId, i64 *rowid) {
sqlite3_stmt *stmt = NULL;
int rc;
char *zSql;
zSql = sqlite3_mprintf("SELECT rowid"
" FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE id = ?",
p->schemaName, p->tableName);
if (!zSql) {
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) {
goto cleanup;
}
sqlite3_bind_value(stmt, 1, valueId);
rc = sqlite3_step(stmt);
if (rc == SQLITE_DONE) {
rc = SQLITE_EMPTY;
goto cleanup;
}
if (rc != SQLITE_ROW) {
goto cleanup;
}
*rowid = sqlite3_column_int64(stmt, 0);
rc = sqlite3_step(stmt);
if (rc != SQLITE_DONE) {
goto cleanup;
2024-04-20 13:38:58 -07:00
}
rc = SQLITE_OK;
cleanup:
sqlite3_finalize(stmt);
return rc;
}
2024-04-20 13:38:58 -07:00
int vec0_result_id(vec0_vtab *p, sqlite3_context *context, i64 rowid) {
if (!p->pkIsText) {
sqlite3_result_int64(context, rowid);
return SQLITE_OK;
2024-04-20 13:38:58 -07:00
}
sqlite3_value *valueId;
int rc = vec0_get_id_value_from_rowid(p, rowid, &valueId);
if (rc != SQLITE_OK) {
return rc;
}
if (!valueId) {
sqlite3_result_error_nomem(context);
} else {
sqlite3_result_value(context, valueId);
sqlite3_value_free(valueId);
}
return SQLITE_OK;
}
2024-04-20 13:38:58 -07:00
/**
* @brief
*
* @param pVtab: virtual table to query
* @param rowid: row to lookup
* @param vector_column_idx: which vector column to query
* @param outVector: Output pointer to the vector buffer.
* Must be sqlite3_free()'ed.
* @param outVectorSize: Pointer to a int where the size of outVector
* will be stored.
* @return int SQLITE_OK on success.
*/
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// Forward declaration — defined in sqlite-vec-ivf.c (included later)
static int ivf_get_vector_data(vec0_vtab *p, i64 rowid, int col_idx,
void **outVector, int *outVectorSize);
#endif
int vec0_get_vector_data(vec0_vtab *pVtab, i64 rowid, int vector_column_idx,
void **outVector, int *outVectorSize) {
vec0_vtab *p = pVtab;
int rc, brc;
#if SQLITE_VEC_ENABLE_DISKANN
// DiskANN fast path: read from _vectors table
if (p->vector_columns[vector_column_idx].index_type == VEC0_INDEX_TYPE_DISKANN) {
void *vec = NULL;
int vecSize;
rc = diskann_vector_read(p, vector_column_idx, rowid, &vec, &vecSize);
if (rc != SQLITE_OK) {
vtab_set_error(&pVtab->base,
"Could not fetch vector data for %lld from DiskANN vectors table",
rowid);
return SQLITE_ERROR;
}
*outVector = vec;
if (outVectorSize) *outVectorSize = vecSize;
return SQLITE_OK;
}
#endif
i64 chunk_id;
i64 chunk_offset;
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// IVF-indexed columns store vectors in _ivf_cells, not _vector_chunks
if (p->vector_columns[vector_column_idx].index_type == VEC0_INDEX_TYPE_IVF) {
return ivf_get_vector_data(p, rowid, vector_column_idx, outVector, outVectorSize);
}
#endif
size_t size;
void *buf = NULL;
int blobOffset;
sqlite3_blob *vectorBlob = NULL;
assert((vector_column_idx >= 0) &&
(vector_column_idx < pVtab->numVectorColumns));
2024-04-20 13:38:58 -07:00
#if SQLITE_VEC_ENABLE_RESCORE
// Rescore columns store float vectors in _rescore_vectors (rowid-keyed)
if (p->vector_columns[vector_column_idx].index_type == VEC0_INDEX_TYPE_RESCORE) {
size = vector_column_byte_size(p->vector_columns[vector_column_idx]);
rc = sqlite3_blob_open(p->db, p->schemaName,
p->shadowRescoreVectorsNames[vector_column_idx],
"vector", rowid, 0, &vectorBlob);
if (rc != SQLITE_OK) {
vtab_set_error(&pVtab->base,
"Could not fetch vector data for %lld from rescore vectors",
rowid);
rc = SQLITE_ERROR;
goto cleanup;
}
buf = sqlite3_malloc(size);
if (!buf) {
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_blob_read(vectorBlob, buf, size, 0);
if (rc != SQLITE_OK) {
sqlite3_free(buf);
buf = NULL;
rc = SQLITE_ERROR;
goto cleanup;
}
*outVector = buf;
if (outVectorSize) {
*outVectorSize = size;
}
rc = SQLITE_OK;
goto cleanup;
}
#endif /* SQLITE_VEC_ENABLE_RESCORE */
rc = vec0_get_chunk_position(pVtab, rowid, NULL, &chunk_id, &chunk_offset);
if (rc == SQLITE_EMPTY) {
vtab_set_error(&pVtab->base, "Could not find a row with rowid %lld", rowid);
goto cleanup;
}
if (rc != SQLITE_OK) {
goto cleanup;
}
2024-04-20 13:38:58 -07:00
rc = sqlite3_blob_open(p->db, p->schemaName,
p->shadowVectorChunksNames[vector_column_idx],
"vectors", chunk_id, 0, &vectorBlob);
2024-04-20 13:38:58 -07:00
if (rc != SQLITE_OK) {
vtab_set_error(&pVtab->base,
"Could not fetch vector data for %lld, opening blob failed",
rowid);
rc = SQLITE_ERROR;
goto cleanup;
}
2024-04-20 13:38:58 -07:00
size = vector_column_byte_size(pVtab->vector_columns[vector_column_idx]);
blobOffset = chunk_offset * size;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
buf = sqlite3_malloc(size);
if (!buf) {
rc = SQLITE_NOMEM;
goto cleanup;
}
2024-04-20 13:38:58 -07:00
rc = sqlite3_blob_read(vectorBlob, buf, size, blobOffset);
if (rc != SQLITE_OK) {
sqlite3_free(buf);
buf = NULL;
vtab_set_error(
&pVtab->base,
"Could not fetch vector data for %lld, reading from blob failed",
rowid);
rc = SQLITE_ERROR;
goto cleanup;
}
2024-04-20 13:38:58 -07:00
*outVector = buf;
if (outVectorSize) {
*outVectorSize = size;
}
rc = SQLITE_OK;
2024-04-20 13:38:58 -07:00
cleanup:
brc = sqlite3_blob_close(vectorBlob);
if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
vtab_set_error(
&p->base, VEC_INTERAL_ERROR
"unknown error, could not close vector blob, please file an issue");
return brc;
}
2024-04-20 13:38:58 -07:00
return rc;
}
2024-04-20 13:38:58 -07:00
/**
* @brief Retrieve the sqlite3_value of the i'th partition value for the given row.
*
* @param pVtab - the vec0_vtab in questions
* @param rowid - rowid of target row
* @param partition_idx - which partition column to retrieve
* @param outValue - output sqlite3_value
* @return int - SQLITE_OK on success, otherwise error code
*/
int vec0_get_partition_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int partition_idx, sqlite3_value ** outValue) {
int rc;
i64 chunk_id;
i64 chunk_offset;
rc = vec0_get_chunk_position(pVtab, rowid, NULL, &chunk_id, &chunk_offset);
if(rc != SQLITE_OK) {
return rc;
}
sqlite3_stmt * stmt = NULL;
char * zSql = sqlite3_mprintf("SELECT partition%02d FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE chunk_id = ?", partition_idx, pVtab->schemaName, pVtab->tableName);
if(!zSql) {
return SQLITE_NOMEM;
}
rc = sqlite3_prepare_v2(pVtab->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if(rc != SQLITE_OK) {
return rc;
}
sqlite3_bind_int64(stmt, 1, chunk_id);
rc = sqlite3_step(stmt);
if(rc != SQLITE_ROW) {
rc = SQLITE_ERROR;
goto done;
}
*outValue = sqlite3_value_dup(sqlite3_column_value(stmt, 0));
if(!*outValue) {
rc = SQLITE_NOMEM;
goto done;
}
rc = SQLITE_OK;
done:
sqlite3_finalize(stmt);
return rc;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
2024-06-24 23:26:11 -07:00
/**
* @brief Get the value of an auxiliary column for the given rowid
*
* @param pVtab vec0_vtab
* @param rowid the rowid of the row to lookup
* @param auxiliary_idx aux index of the column we care about
* @param outValue Output sqlite3_value to store
* @return int SQLITE_OK on success, error code otherwise
*/
int vec0_get_auxiliary_value_for_rowid(vec0_vtab *pVtab, i64 rowid, int auxiliary_idx, sqlite3_value ** outValue) {
int rc;
sqlite3_stmt * stmt = NULL;
char * zSql = sqlite3_mprintf("SELECT value%02d FROM " VEC0_SHADOW_AUXILIARY_NAME " WHERE rowid = ?", auxiliary_idx, pVtab->schemaName, pVtab->tableName);
if(!zSql) {
return SQLITE_NOMEM;
}
rc = sqlite3_prepare_v2(pVtab->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if(rc != SQLITE_OK) {
return rc;
}
sqlite3_bind_int64(stmt, 1, rowid);
rc = sqlite3_step(stmt);
if(rc != SQLITE_ROW) {
rc = SQLITE_ERROR;
goto done;
}
*outValue = sqlite3_value_dup(sqlite3_column_value(stmt, 0));
if(!*outValue) {
rc = SQLITE_NOMEM;
goto done;
}
rc = SQLITE_OK;
2024-04-20 13:38:58 -07:00
done:
sqlite3_finalize(stmt);
return rc;
}
/**
* @brief Result the given metadata value for the given row and metadata column index.
* Will traverse the metadatachunksNN table with BLOB I/0 for the given rowid.
*
* @param p
* @param rowid
* @param metadata_idx
* @param context
* @return int
*/
int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_idx, sqlite3_context * context) {
int rc;
i64 chunk_id;
i64 chunk_offset;
rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
if(rc != SQLITE_OK) {
return rc;
}
sqlite3_blob * blobValue;
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &blobValue);
if(rc != SQLITE_OK) {
return rc;
}
2024-10-11 09:09:32 -07:00
switch(p->metadata_columns[metadata_idx].kind) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
u8 block;
rc = sqlite3_blob_read(blobValue, &block, sizeof(block), chunk_offset / CHAR_BIT);
if(rc != SQLITE_OK) {
goto done;
}
int value = block >> ((chunk_offset % CHAR_BIT)) & 1;
sqlite3_result_int(context, value);
break;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER: {
i64 value;
rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
if(rc != SQLITE_OK) {
goto done;
}
sqlite3_result_int64(context, value);
break;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT: {
double value;
rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
if(rc != SQLITE_OK) {
goto done;
}
sqlite3_result_double(context, value);
break;
}
case VEC0_METADATA_COLUMN_KIND_TEXT: {
u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
rc = sqlite3_blob_read(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
if(rc != SQLITE_OK) {
goto done;
}
int length = ((int *)view)[0];
if(length <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
sqlite3_result_text(context, (const char*) (view + 4), length, SQLITE_TRANSIENT);
}
else {
sqlite3_stmt * stmt;
const char * zSql = sqlite3_mprintf("SELECT data FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
if(!zSql) {
rc = SQLITE_ERROR;
goto done;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free((void *) zSql);
if(rc != SQLITE_OK) {
goto done;
}
sqlite3_bind_int64(stmt, 1, rowid);
rc = sqlite3_step(stmt);
if(rc != SQLITE_ROW) {
sqlite3_finalize(stmt);
rc = SQLITE_ERROR;
goto done;
}
sqlite3_result_value(context, sqlite3_column_value(stmt, 0));
sqlite3_finalize(stmt);
rc = SQLITE_OK;
}
break;
}
}
done:
// blobValue is read-only, will not fail on close
sqlite3_blob_close(blobValue);
return rc;
}
int vec0_get_latest_chunk_rowid(vec0_vtab *p, i64 *chunk_rowid, sqlite3_value ** partitionKeyValues) {
int rc;
const char *zSql;
// lazy initialize stmtLatestChunk when needed. May be cleared during xSync()
if (!p->stmtLatestChunk) {
if(p->numPartitionColumns > 0) {
sqlite3_str * s = sqlite3_str_new(NULL);
sqlite3_str_appendf(s, "SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE ",
p->schemaName, p->tableName);
for(int i = 0; i < p->numPartitionColumns; i++) {
if(i != 0) {
sqlite3_str_appendall(s, " AND ");
}
sqlite3_str_appendf(s, " partition%02d = ? ", i);
}
zSql = sqlite3_str_finish(s);
}else {
zSql = sqlite3_mprintf("SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME,
p->schemaName, p->tableName);
}
if (!zSql) {
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtLatestChunk, 0);
sqlite3_free((void *)zSql);
if (rc != SQLITE_OK) {
// IMP: V21406_05476
vtab_set_error(&p->base, VEC_INTERAL_ERROR
"could not initialize 'latest chunk' statement");
goto cleanup;
}
}
2024-04-20 13:38:58 -07:00
for(int i = 0; i < p->numPartitionColumns; i++) {
sqlite3_bind_value(p->stmtLatestChunk, i+1, (partitionKeyValues[i]));
}
2024-04-20 13:38:58 -07:00
rc = sqlite3_step(p->stmtLatestChunk);
if (rc != SQLITE_ROW) {
// IMP: V31559_15629
vtab_set_error(&p->base, VEC_INTERAL_ERROR "Could not find latest chunk");
rc = SQLITE_ERROR;
goto cleanup;
}
if(sqlite3_column_type(p->stmtLatestChunk, 0) == SQLITE_NULL){
rc = SQLITE_EMPTY;
goto cleanup;
}
*chunk_rowid = sqlite3_column_int64(p->stmtLatestChunk, 0);
rc = sqlite3_step(p->stmtLatestChunk);
if (rc != SQLITE_DONE) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"unknown result code when closing out stmtLatestChunk. "
"Please file an issue: " REPORT_URL,
p->schemaName, p->shadowChunksName);
goto cleanup;
}
rc = SQLITE_OK;
2024-04-20 13:38:58 -07:00
cleanup:
if (p->stmtLatestChunk) {
sqlite3_reset(p->stmtLatestChunk);
sqlite3_clear_bindings(p->stmtLatestChunk);
}
return rc;
}
int vec0_rowids_insert_rowid(vec0_vtab *p, i64 rowid) {
int rc = SQLITE_OK;
int entered = 0;
UNUSED_PARAMETER(entered); // temporary
if (!p->stmtRowidsInsertRowid) {
const char *zSql =
sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME "(rowid)"
"VALUES (?);",
p->schemaName, p->tableName);
if (!zSql) {
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertRowid, 0);
sqlite3_free((void *)zSql);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, VEC_INTERAL_ERROR
"could not initialize 'insert rowids' statement");
goto cleanup;
}
}
#if SQLITE_THREADSAFE
if (sqlite3_mutex_enter) {
sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
entered = 1;
}
#endif
sqlite3_bind_int64(p->stmtRowidsInsertRowid, 1, rowid);
rc = sqlite3_step(p->stmtRowidsInsertRowid);
if (rc != SQLITE_DONE) {
if (sqlite3_extended_errcode(p->db) == SQLITE_CONSTRAINT_PRIMARYKEY) {
// IMP: V17090_01160
vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
p->tableName);
} else {
// IMP: V04679_21517
vtab_set_error(&p->base,
"Error inserting rowid into rowids shadow table: %s",
sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
}
rc = SQLITE_ERROR;
goto cleanup;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = SQLITE_OK;
cleanup:
if (p->stmtRowidsInsertRowid) {
sqlite3_reset(p->stmtRowidsInsertRowid);
sqlite3_clear_bindings(p->stmtRowidsInsertRowid);
}
2024-04-20 13:38:58 -07:00
#if SQLITE_THREADSAFE
if (sqlite3_mutex_leave && entered) {
sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
}
#endif
return rc;
}
2024-04-20 13:38:58 -07:00
int vec0_rowids_insert_id(vec0_vtab *p, sqlite3_value *idValue, i64 *rowid) {
int rc = SQLITE_OK;
int entered = 0;
UNUSED_PARAMETER(entered); // temporary
if (!p->stmtRowidsInsertId) {
const char *zSql =
sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_ROWIDS_NAME "(id)"
"VALUES (?);",
p->schemaName, p->tableName);
if (!zSql) {
rc = SQLITE_NOMEM;
goto complete;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsInsertId, 0);
sqlite3_free((void *)zSql);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, VEC_INTERAL_ERROR
"could not initialize 'insert rowids id' statement");
goto complete;
}
}
2024-04-20 13:38:58 -07:00
#if SQLITE_THREADSAFE
if (sqlite3_mutex_enter) {
sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
entered = 1;
}
#endif
2024-04-20 13:38:58 -07:00
if (idValue) {
sqlite3_bind_value(p->stmtRowidsInsertId, 1, idValue);
}
rc = sqlite3_step(p->stmtRowidsInsertId);
if (rc != SQLITE_DONE) {
if (sqlite3_extended_errcode(p->db) == SQLITE_CONSTRAINT_UNIQUE) {
// IMP: V20497_04568
vtab_set_error(&p->base, "UNIQUE constraint failed on %s primary key",
p->tableName);
} else {
// IMP: V24016_08086
// IMP: V15177_32015
vtab_set_error(&p->base,
"Error inserting id into rowids shadow table: %s",
sqlite3_errmsg(sqlite3_db_handle(p->stmtRowidsInsertId)));
}
rc = SQLITE_ERROR;
goto complete;
}
*rowid = sqlite3_last_insert_rowid(p->db);
rc = SQLITE_OK;
2024-04-20 13:38:58 -07:00
complete:
if (p->stmtRowidsInsertId) {
sqlite3_reset(p->stmtRowidsInsertId);
sqlite3_clear_bindings(p->stmtRowidsInsertId);
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
#if SQLITE_THREADSAFE
if (sqlite3_mutex_leave && entered) {
sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
}
#endif
return rc;
}
2024-04-20 13:38:58 -07:00
int vec0_metadata_chunk_size(vec0_metadata_column_kind kind, int chunk_size) {
switch(kind) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN:
return chunk_size / 8;
case VEC0_METADATA_COLUMN_KIND_INTEGER:
return chunk_size * sizeof(i64);
case VEC0_METADATA_COLUMN_KIND_FLOAT:
return chunk_size * sizeof(double);
case VEC0_METADATA_COLUMN_KIND_TEXT:
return chunk_size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH;
}
return 0;
}
2024-04-20 13:38:58 -07:00
int vec0_rowids_update_position(vec0_vtab *p, i64 rowid, i64 chunk_rowid,
i64 chunk_offset) {
int rc = SQLITE_OK;
2024-04-20 13:38:58 -07:00
if (!p->stmtRowidsUpdatePosition) {
const char *zSql = sqlite3_mprintf(" UPDATE " VEC0_SHADOW_ROWIDS_NAME
" SET chunk_id = ?, chunk_offset = ?"
" WHERE rowid = ?",
p->schemaName, p->tableName);
if (!zSql) {
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->stmtRowidsUpdatePosition, 0);
sqlite3_free((void *)zSql);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, VEC_INTERAL_ERROR
"could not initialize 'update rowids position' statement");
goto cleanup;
}
}
2024-04-20 13:38:58 -07:00
sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 1, chunk_rowid);
sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 2, chunk_offset);
sqlite3_bind_int64(p->stmtRowidsUpdatePosition, 3, rowid);
2024-04-20 13:38:58 -07:00
rc = sqlite3_step(p->stmtRowidsUpdatePosition);
if (rc != SQLITE_DONE) {
// IMP: V21925_05995
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"could not update rowids position for rowid=%lld, "
"chunk_rowid=%lld, chunk_offset=%lld",
rowid, chunk_rowid, chunk_offset);
rc = SQLITE_ERROR;
goto cleanup;
}
rc = SQLITE_OK;
2024-04-20 13:38:58 -07:00
cleanup:
if (p->stmtRowidsUpdatePosition) {
sqlite3_reset(p->stmtRowidsUpdatePosition);
sqlite3_clear_bindings(p->stmtRowidsUpdatePosition);
}
2024-04-20 13:38:58 -07:00
return rc;
2024-06-25 08:52:48 -07:00
}
2024-10-11 09:09:32 -07:00
/**
* @brief Adds a new chunk for the vec0 table, and the corresponding vector
* chunks.
2024-10-11 09:09:32 -07:00
*
* Inserts a new row into the _chunks table, with blank data, and uses that new
* rowid to insert new blank rows into _vector_chunksXX tables.
*
* @param p: vec0 table to add new chunk
* @param paritionKeyValues: Array of partition key valeus for the new chunk, if available
* @param chunk_rowid: Output pointer, if not NULL, then will be filled with the
* new chunk rowid.
* @return int SQLITE_OK on success, error code otherwise.
2024-10-11 09:09:32 -07:00
*/
int vec0_new_chunk(vec0_vtab *p, sqlite3_value ** partitionKeyValues, i64 *chunk_rowid) {
int rc;
char *zSql;
sqlite3_stmt *stmt;
i64 rowid;
2024-06-24 23:26:11 -07:00
// Step 1: Insert a new row in _chunks, capture that new rowid
if(p->numPartitionColumns > 0) {
sqlite3_str * s = sqlite3_str_new(NULL);
sqlite3_str_appendf(s, "INSERT INTO " VEC0_SHADOW_CHUNKS_NAME, p->schemaName, p->tableName);
sqlite3_str_appendall(s, "(size, validity, rowids");
for(int i = 0; i < p->numPartitionColumns; i++) {
sqlite3_str_appendf(s, ", partition%02d", i);
}
sqlite3_str_appendall(s, ") VALUES (?, ?, ?");
for(int i = 0; i < p->numPartitionColumns; i++) {
sqlite3_str_appendall(s, ", ?");
}
sqlite3_str_appendall(s, ")");
2024-06-25 08:52:48 -07:00
zSql = sqlite3_str_finish(s);
}else {
zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_CHUNKS_NAME
"(size, validity, rowids) "
"VALUES (?, ?, ?);",
p->schemaName, p->tableName);
2024-06-24 23:26:11 -07:00
}
if (!zSql) {
return SQLITE_NOMEM;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) {
sqlite3_finalize(stmt);
return rc;
}
#if SQLITE_THREADSAFE
if (sqlite3_mutex_enter) {
sqlite3_mutex_enter(sqlite3_db_mutex(p->db));
}
#endif
2024-06-24 23:26:11 -07:00
sqlite3_bind_int64(stmt, 1, p->chunk_size); // size
sqlite3_bind_zeroblob(stmt, 2, p->chunk_size / CHAR_BIT); // validity bitmap
sqlite3_bind_zeroblob(stmt, 3, p->chunk_size * sizeof(i64)); // rowids
for(int i = 0; i < p->numPartitionColumns; i++) {
sqlite3_bind_value(stmt, 4 + i, partitionKeyValues[i]);
}
2024-04-20 13:38:58 -07:00
rc = sqlite3_step(stmt);
int failed = rc != SQLITE_DONE;
rowid = sqlite3_last_insert_rowid(p->db);
#if SQLITE_THREADSAFE
if (sqlite3_mutex_leave) {
sqlite3_mutex_leave(sqlite3_db_mutex(p->db));
}
#endif
sqlite3_finalize(stmt);
if (failed) {
return SQLITE_ERROR;
}
// Step 2: Create new vector chunks for each vector column, with
// that new chunk_rowid.
//
// SHADOW_TABLE_ROWID_QUIRK: The _vector_chunksNN and _metadatachunksNN
// shadow tables declare "rowid PRIMARY KEY" without the INTEGER type, so
// the user-defined "rowid" column is NOT an alias for the internal SQLite
// rowid (_rowid_). When only appending rows these two happen to stay in
// sync, but after a chunk is deleted (vec0Update_Delete_DeleteChunkIfEmpty)
// and a new one is created, the auto-assigned _rowid_ can diverge from the
// user "rowid" value. Since sqlite3_blob_open() addresses rows by internal
// _rowid_, we must explicitly set BOTH _rowid_ and "rowid" to the same
// value so that later blob operations can find the row.
//
// The correct long-term fix is changing the schema to
// "rowid INTEGER PRIMARY KEY"
// which makes it a true alias, but that would break existing databases.
2024-04-20 13:38:58 -07:00
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
continue;
}
int vector_column_idx = p->user_column_idxs[i];
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
if (p->vector_columns[vector_column_idx].index_type != VEC0_INDEX_TYPE_FLAT) {
continue;
}
i64 vectorsSize =
p->chunk_size * vector_column_byte_size(p->vector_columns[vector_column_idx]);
// See SHADOW_TABLE_ROWID_QUIRK above for why _rowid_ and rowid are both set.
zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_VECTOR_N_NAME
"(_rowid_, rowid, vectors)"
"VALUES (?, ?, ?)",
p->schemaName, p->tableName, vector_column_idx);
if (!zSql) {
return SQLITE_NOMEM;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) {
sqlite3_finalize(stmt);
return rc;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
sqlite3_bind_int64(stmt, 1, rowid); // _rowid_ (internal SQLite rowid)
sqlite3_bind_int64(stmt, 2, rowid); // rowid (user-defined column)
sqlite3_bind_zeroblob64(stmt, 3, vectorsSize);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = sqlite3_step(stmt);
sqlite3_finalize(stmt);
if (rc != SQLITE_DONE) {
return rc;
}
}
#if SQLITE_VEC_ENABLE_RESCORE
// Create new rescore chunks for each rescore-enabled vector column
rc = rescore_new_chunk(p, rowid);
if (rc != SQLITE_OK) {
return rc;
}
#endif
// Step 3: Create new metadata chunks for each metadata column
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
continue;
}
int metadata_column_idx = p->user_column_idxs[i];
// See SHADOW_TABLE_ROWID_QUIRK above for why _rowid_ and rowid are both set.
zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_N_NAME
"(_rowid_, rowid, data)"
"VALUES (?, ?, ?)",
p->schemaName, p->tableName, metadata_column_idx);
if (!zSql) {
return SQLITE_NOMEM;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) {
sqlite3_finalize(stmt);
return rc;
}
sqlite3_bind_int64(stmt, 1, rowid); // _rowid_ (internal SQLite rowid)
sqlite3_bind_int64(stmt, 2, rowid); // rowid (user-defined column)
sqlite3_bind_zeroblob64(stmt, 3, vec0_metadata_chunk_size(p->metadata_columns[metadata_column_idx].kind, p->chunk_size));
rc = sqlite3_step(stmt);
sqlite3_finalize(stmt);
if (rc != SQLITE_DONE) {
return rc;
}
}
if (chunk_rowid) {
*chunk_rowid = rowid;
}
return SQLITE_OK;
}
struct vec0_query_fullscan_data {
sqlite3_stmt *rowids_stmt;
i8 done;
};
void vec0_query_fullscan_data_clear(
struct vec0_query_fullscan_data *fullscan_data) {
if (!fullscan_data)
return;
if (fullscan_data->rowids_stmt) {
sqlite3_finalize(fullscan_data->rowids_stmt);
fullscan_data->rowids_stmt = NULL;
}
}
struct vec0_query_knn_data {
i64 k;
i64 k_used;
// Array of rowids of size k. Must be freed with sqlite3_free().
i64 *rowids;
// Array of distances of size k. Must be freed with sqlite3_free().
f32 *distances;
i64 current_idx;
};
void vec0_query_knn_data_clear(struct vec0_query_knn_data *knn_data) {
if (!knn_data)
return;
2024-04-20 13:38:58 -07:00
if (knn_data->rowids) {
sqlite3_free(knn_data->rowids);
knn_data->rowids = NULL;
2024-06-28 15:29:13 -07:00
}
if (knn_data->distances) {
sqlite3_free(knn_data->distances);
knn_data->distances = NULL;
2024-06-28 15:29:13 -07:00
}
}
struct vec0_query_point_data {
i64 rowid;
void *vectors[VEC0_MAX_VECTOR_COLUMNS];
int done;
};
void vec0_query_point_data_clear(struct vec0_query_point_data *point_data) {
if (!point_data)
return;
for (int i = 0; i < VEC0_MAX_VECTOR_COLUMNS; i++) {
sqlite3_free(point_data->vectors[i]);
point_data->vectors[i] = NULL;
2024-04-20 13:38:58 -07:00
}
}
typedef enum {
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
2024-04-20 13:38:58 -07:00
VEC0_QUERY_PLAN_FULLSCAN = '1',
VEC0_QUERY_PLAN_POINT = '2',
VEC0_QUERY_PLAN_KNN = '3',
} vec0_query_plan;
2024-06-28 15:29:13 -07:00
typedef struct vec0_cursor vec0_cursor;
struct vec0_cursor {
sqlite3_vtab_cursor base;
2024-06-28 15:29:13 -07:00
vec0_query_plan query_plan;
struct vec0_query_fullscan_data *fullscan_data;
struct vec0_query_knn_data *knn_data;
struct vec0_query_point_data *point_data;
};
2024-04-20 13:38:58 -07:00
void vec0_cursor_clear(vec0_cursor *pCur) {
if (pCur->fullscan_data) {
vec0_query_fullscan_data_clear(pCur->fullscan_data);
sqlite3_free(pCur->fullscan_data);
pCur->fullscan_data = NULL;
2024-06-28 15:29:13 -07:00
}
if (pCur->knn_data) {
vec0_query_knn_data_clear(pCur->knn_data);
sqlite3_free(pCur->knn_data);
pCur->knn_data = NULL;
2024-04-20 13:38:58 -07:00
}
if (pCur->point_data) {
vec0_query_point_data_clear(pCur->point_data);
sqlite3_free(pCur->point_data);
pCur->point_data = NULL;
2024-07-23 23:57:42 -07:00
}
2024-04-20 13:38:58 -07:00
}
// IVF index implementation — #include'd here after all struct/helper definitions
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
#include "sqlite-vec-ivf-kmeans.c"
#include "sqlite-vec-ivf.c"
#endif
#define VEC_CONSTRUCTOR_ERROR "vec0 constructor error: "
static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
sqlite3_vtab **ppVtab, char **pzErr, bool isCreate) {
UNUSED_PARAMETER(pAux);
vec0_vtab *pNew;
int rc;
const char *zSql;
pNew = sqlite3_malloc(sizeof(*pNew));
if (pNew == 0)
return SQLITE_NOMEM;
memset(pNew, 0, sizeof(*pNew));
// Declared chunk_size=N for entire table.
// -1 to use the defualt, otherwise will get re-assigned on `chunk_size=N`
// option
int chunk_size = -1;
int numVectorColumns = 0;
int numPartitionColumns = 0;
int numAuxiliaryColumns = 0;
int numMetadataColumns = 0;
int user_column_idx = 0;
// track if a "primary key" column is defined
char *pkColumnName = NULL;
int pkColumnNameLength;
int pkColumnType = SQLITE_INTEGER;
for (int i = 3; i < argc; i++) {
struct VectorColumnDefinition vecColumn;
struct Vec0PartitionColumnDefinition partitionColumn;
struct Vec0AuxiliaryColumnDefinition auxColumn;
struct Vec0MetadataColumnDefinition metadataColumn;
char *cName = NULL;
int cNameLength;
int cType;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
// Scenario #1: Constructor argument is a vector column definition, ie `foo float[1024]`
rc = vec0_parse_vector_column(argv[i], strlen(argv[i]), &vecColumn);
if (rc == SQLITE_ERROR) {
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR "could not parse vector column '%s'", argv[i]);
goto error;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
if (rc == SQLITE_OK) {
if (numVectorColumns >= VEC0_MAX_VECTOR_COLUMNS) {
sqlite3_free(vecColumn.name);
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"Too many provided vector columns, maximum %d",
VEC0_MAX_VECTOR_COLUMNS);
goto error;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
if (vecColumn.dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS) {
sqlite3_free(vecColumn.name);
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"Dimension on vector column too large, provided %lld, maximum %lld",
(i64)vecColumn.dimensions, SQLITE_VEC_VEC0_MAX_DIMENSIONS);
goto error;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
// DiskANN validation
if (vecColumn.index_type == VEC0_INDEX_TYPE_DISKANN) {
if (vecColumn.element_type == SQLITE_VEC_ELEMENT_TYPE_BIT) {
sqlite3_free(vecColumn.name);
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"DiskANN index is not supported on bit vector columns");
goto error;
}
if (vecColumn.diskann.quantizer_type == VEC0_DISKANN_QUANTIZER_BINARY &&
(vecColumn.dimensions % CHAR_BIT) != 0) {
sqlite3_free(vecColumn.name);
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"DiskANN with binary quantizer requires dimensions divisible by 8");
goto error;
}
}
pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_VECTOR;
pNew->user_column_idxs[user_column_idx] = numVectorColumns;
memcpy(&pNew->vector_columns[numVectorColumns], &vecColumn, sizeof(vecColumn));
numVectorColumns++;
pNew->numVectorColumns = numVectorColumns;
user_column_idx++;
continue;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
// Scenario #2: Constructor argument is a partition key column definition, ie `user_id text partition key`
rc = vec0_parse_partition_key_definition(argv[i], strlen(argv[i]), &cName,
&cNameLength, &cType);
if (rc == SQLITE_OK) {
if (numPartitionColumns >= VEC0_MAX_PARTITION_COLUMNS) {
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"More than %d partition key columns were provided",
VEC0_MAX_PARTITION_COLUMNS);
goto error;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
partitionColumn.type = cType;
partitionColumn.name_length = cNameLength;
partitionColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
if(!partitionColumn.name) {
rc = SQLITE_NOMEM;
goto error;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_PARTITION;
pNew->user_column_idxs[user_column_idx] = numPartitionColumns;
memcpy(&pNew->paritition_columns[numPartitionColumns], &partitionColumn, sizeof(partitionColumn));
numPartitionColumns++;
pNew->numPartitionColumns = numPartitionColumns;
user_column_idx++;
continue;
}
// Scenario #3: Constructor argument is a primary key column definition, ie `article_id text primary key`
rc = vec0_parse_primary_key_definition(argv[i], strlen(argv[i]), &cName,
&cNameLength, &cType);
if (rc == SQLITE_OK) {
if (pkColumnName) {
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"More than one primary key definition was provided, vec0 only "
"suports a single primary key column",
argv[i]);
goto error;
}
pkColumnName = cName;
pkColumnNameLength = cNameLength;
pkColumnType = cType;
continue;
}
// Scenario #4: Constructor argument is a auxiliary column definition, ie `+contents text`
rc = vec0_parse_auxiliary_column_definition(argv[i], strlen(argv[i]), &cName,
&cNameLength, &cType);
if(rc == SQLITE_OK) {
if (numAuxiliaryColumns >= VEC0_MAX_AUXILIARY_COLUMNS) {
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"More than %d auxiliary columns were provided",
VEC0_MAX_AUXILIARY_COLUMNS);
goto error;
}
auxColumn.type = cType;
auxColumn.name_length = cNameLength;
auxColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
if(!auxColumn.name) {
rc = SQLITE_NOMEM;
goto error;
}
pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY;
pNew->user_column_idxs[user_column_idx] = numAuxiliaryColumns;
memcpy(&pNew->auxiliary_columns[numAuxiliaryColumns], &auxColumn, sizeof(auxColumn));
numAuxiliaryColumns++;
pNew->numAuxiliaryColumns = numAuxiliaryColumns;
user_column_idx++;
continue;
}
vec0_metadata_column_kind kind;
rc = vec0_parse_metadata_column_definition(argv[i], strlen(argv[i]), &cName,
&cNameLength, &kind);
if(rc == SQLITE_OK) {
if (numMetadataColumns >= VEC0_MAX_METADATA_COLUMNS) {
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"More than %d metadata columns were provided",
VEC0_MAX_METADATA_COLUMNS);
goto error;
}
metadataColumn.kind = kind;
metadataColumn.name_length = cNameLength;
metadataColumn.name = sqlite3_mprintf("%.*s", cNameLength, cName);
if(!metadataColumn.name) {
rc = SQLITE_NOMEM;
goto error;
}
pNew->user_column_kinds[user_column_idx] = SQLITE_VEC0_USER_COLUMN_KIND_METADATA;
pNew->user_column_idxs[user_column_idx] = numMetadataColumns;
memcpy(&pNew->metadata_columns[numMetadataColumns], &metadataColumn, sizeof(metadataColumn));
numMetadataColumns++;
pNew->numMetadataColumns = numMetadataColumns;
user_column_idx++;
continue;
}
// Scenario #4: Constructor argument is a table-level option, ie `chunk_size`
char *key;
char *value;
int keyLength, valueLength;
rc = vec0_parse_table_option(argv[i], strlen(argv[i]), &key, &keyLength,
&value, &valueLength);
if (rc == SQLITE_ERROR) {
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR "could not parse table option '%s'", argv[i]);
goto error;
}
if (rc == SQLITE_OK) {
if (sqlite3_strnicmp(key, "chunk_size", keyLength) == 0) {
chunk_size = atoi(value);
if (chunk_size <= 0) {
// IMP: V01931_18769
*pzErr =
sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"chunk_size must be a non-zero positive integer");
goto error;
}
if ((chunk_size % 8) != 0) {
// IMP: V14110_30948
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"chunk_size must be divisible by 8");
goto error;
}
#define SQLITE_VEC_CHUNK_SIZE_MAX 4096
if (chunk_size > SQLITE_VEC_CHUNK_SIZE_MAX) {
*pzErr =
sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR "chunk_size too large");
goto error;
}
} else {
// IMP: V27642_11712
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR "Unknown table option: %.*s", keyLength, key);
goto error;
}
continue;
}
// Scenario #5: Unknown constructor argument
*pzErr =
sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR "Could not parse '%s'", argv[i]);
goto error;
2024-07-23 23:57:42 -07:00
}
if (chunk_size < 0) {
chunk_size = 1024;
2024-07-23 23:57:42 -07:00
}
if (numVectorColumns <= 0) {
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"At least one vector column is required");
goto error;
2024-07-23 23:57:42 -07:00
}
#if SQLITE_VEC_ENABLE_RESCORE
{
int hasRescore = 0;
for (int i = 0; i < numVectorColumns; i++) {
if (pNew->vector_columns[i].index_type == VEC0_INDEX_TYPE_RESCORE) {
hasRescore = 1;
break;
}
}
if (hasRescore) {
if (numMetadataColumns > 0) {
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"Metadata columns are not supported with rescore indexes");
goto error;
}
if (numPartitionColumns > 0) {
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"Partition key columns are not supported with rescore indexes");
goto error;
}
}
}
#endif
// IVF indexes do not support auxiliary, metadata, or partition key columns.
{
int has_ivf = 0;
for (int i = 0; i < numVectorColumns; i++) {
if (pNew->vector_columns[i].index_type == VEC0_INDEX_TYPE_IVF) {
has_ivf = 1;
break;
}
}
if (has_ivf) {
if (numPartitionColumns > 0) {
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"partition key columns are not supported with IVF indexes");
goto error;
}
if (numMetadataColumns > 0) {
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"metadata columns are not supported with IVF indexes");
goto error;
}
}
}
// DiskANN columns cannot coexist with aux/metadata/partition columns
for (int i = 0; i < numVectorColumns; i++) {
if (pNew->vector_columns[i].index_type == VEC0_INDEX_TYPE_DISKANN) {
if (numMetadataColumns > 0) {
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"Metadata columns are not supported with DiskANN-indexed vector columns");
goto error;
}
if (numPartitionColumns > 0) {
*pzErr = sqlite3_mprintf(
VEC_CONSTRUCTOR_ERROR
"Partition key columns are not supported with DiskANN-indexed vector columns");
goto error;
}
break;
}
}
sqlite3_str *createStr = sqlite3_str_new(NULL);
sqlite3_str_appendall(createStr, "CREATE TABLE x(");
if (pkColumnName) {
sqlite3_str_appendf(createStr, "\"%.*w\" primary key, ", pkColumnNameLength,
pkColumnName);
} else {
sqlite3_str_appendall(createStr, "rowid, ");
2024-07-23 23:57:42 -07:00
}
for (int i = 0; i < numVectorColumns + numPartitionColumns + numAuxiliaryColumns + numMetadataColumns; i++) {
switch(pNew->user_column_kinds[i]) {
case SQLITE_VEC0_USER_COLUMN_KIND_VECTOR: {
int vector_idx = pNew->user_column_idxs[i];
sqlite3_str_appendf(createStr, "\"%.*w\", ",
pNew->vector_columns[vector_idx].name_length,
pNew->vector_columns[vector_idx].name);
break;
}
case SQLITE_VEC0_USER_COLUMN_KIND_PARTITION: {
int partition_idx = pNew->user_column_idxs[i];
sqlite3_str_appendf(createStr, "\"%.*w\", ",
pNew->paritition_columns[partition_idx].name_length,
pNew->paritition_columns[partition_idx].name);
break;
}
case SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY: {
int auxiliary_idx = pNew->user_column_idxs[i];
sqlite3_str_appendf(createStr, "\"%.*w\", ",
pNew->auxiliary_columns[auxiliary_idx].name_length,
pNew->auxiliary_columns[auxiliary_idx].name);
break;
}
case SQLITE_VEC0_USER_COLUMN_KIND_METADATA: {
int metadata_idx = pNew->user_column_idxs[i];
sqlite3_str_appendf(createStr, "\"%.*w\", ",
pNew->metadata_columns[metadata_idx].name_length,
pNew->metadata_columns[metadata_idx].name);
break;
}
}
}
sqlite3_str_appendall(createStr, " distance hidden, k hidden) ");
if (pkColumnName) {
sqlite3_str_appendall(createStr, "without rowid ");
}
zSql = sqlite3_str_finish(createStr);
if (!zSql) {
goto error;
}
rc = sqlite3_declare_vtab(db, zSql);
sqlite3_free((void *)zSql);
if (rc != SQLITE_OK) {
*pzErr = sqlite3_mprintf(VEC_CONSTRUCTOR_ERROR
"could not declare virtual table, '%s'",
sqlite3_errmsg(db));
goto error;
}
const char *schemaName = argv[1];
const char *tableName = argv[2];
pNew->db = db;
pNew->pkIsText = pkColumnType == SQLITE_TEXT;
pNew->schemaName = sqlite3_mprintf("%s", schemaName);
if (!pNew->schemaName) {
goto error;
}
pNew->tableName = sqlite3_mprintf("%s", tableName);
if (!pNew->tableName) {
goto error;
2024-07-23 23:57:42 -07:00
}
pNew->shadowRowidsName = sqlite3_mprintf("%s_rowids", tableName);
if (!pNew->shadowRowidsName) {
goto error;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
pNew->shadowChunksName = sqlite3_mprintf("%s_chunks", tableName);
if (!pNew->shadowChunksName) {
goto error;
}
pNew->numVectorColumns = numVectorColumns;
pNew->numPartitionColumns = numPartitionColumns;
pNew->numAuxiliaryColumns = numAuxiliaryColumns;
pNew->numMetadataColumns = numMetadataColumns;
for (int i = 0; i < pNew->numVectorColumns; i++) {
pNew->shadowVectorChunksNames[i] =
sqlite3_mprintf("%s_vector_chunks%02d", tableName, i);
if (!pNew->shadowVectorChunksNames[i]) {
goto error;
}
#if SQLITE_VEC_ENABLE_RESCORE
if (pNew->vector_columns[i].index_type == VEC0_INDEX_TYPE_RESCORE) {
pNew->shadowRescoreChunksNames[i] =
sqlite3_mprintf("%s_rescore_chunks%02d", tableName, i);
if (!pNew->shadowRescoreChunksNames[i]) {
goto error;
}
pNew->shadowRescoreVectorsNames[i] =
sqlite3_mprintf("%s_rescore_vectors%02d", tableName, i);
if (!pNew->shadowRescoreVectorsNames[i]) {
goto error;
}
}
#endif
#if SQLITE_VEC_ENABLE_DISKANN
if (pNew->vector_columns[i].index_type == VEC0_INDEX_TYPE_DISKANN) {
pNew->shadowVectorsNames[i] =
sqlite3_mprintf("%s_vectors%02d", tableName, i);
if (!pNew->shadowVectorsNames[i]) {
goto error;
}
pNew->shadowDiskannNodesNames[i] =
sqlite3_mprintf("%s_diskann_nodes%02d", tableName, i);
if (!pNew->shadowDiskannNodesNames[i]) {
goto error;
}
}
#endif
}
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
for (int i = 0; i < pNew->numVectorColumns; i++) {
if (pNew->vector_columns[i].index_type != VEC0_INDEX_TYPE_IVF) continue;
pNew->shadowIvfCellsNames[i] =
sqlite3_mprintf("%s_ivf_cells%02d", tableName, i);
if (!pNew->shadowIvfCellsNames[i]) goto error;
pNew->ivfTrainedCache[i] = -1; // unknown
}
#endif
for (int i = 0; i < pNew->numMetadataColumns; i++) {
pNew->shadowMetadataChunksNames[i] =
sqlite3_mprintf("%s_metadatachunks%02d", tableName, i);
if (!pNew->shadowMetadataChunksNames[i]) {
goto error;
}
}
pNew->chunk_size = chunk_size;
// if xCreate, then create the necessary shadow tables
if (isCreate) {
sqlite3_stmt *stmt;
int rc;
2024-04-20 13:38:58 -07:00
char * zCreateInfo = sqlite3_mprintf("CREATE TABLE "VEC0_SHADOW_INFO_NAME " (key text primary key, value any)", pNew->schemaName, pNew->tableName);
if(!zCreateInfo) {
goto error;
}
rc = sqlite3_prepare_v2(db, zCreateInfo, -1, &stmt, NULL);
sqlite3_free((void *) zCreateInfo);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
// TODO(IMP)
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf("Could not create '_info' shadow table: %s",
sqlite3_errmsg(db));
goto error;
}
2024-06-24 23:26:11 -07:00
sqlite3_finalize(stmt);
2024-04-20 13:38:58 -07:00
char * zSeedInfo = sqlite3_mprintf(
"INSERT INTO "VEC0_SHADOW_INFO_NAME "(key, value) VALUES "
"(?1, ?2), (?3, ?4), (?5, ?6), (?7, ?8) ",
pNew->schemaName, pNew->tableName
);
if(!zSeedInfo) {
goto error;
}
rc = sqlite3_prepare_v2(db, zSeedInfo, -1, &stmt, NULL);
sqlite3_free((void *) zSeedInfo);
if (rc != SQLITE_OK) {
// TODO(IMP)
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf("Could not seed '_info' shadow table: %s",
sqlite3_errmsg(db));
goto error;
2024-06-24 23:26:11 -07:00
}
sqlite3_bind_text(stmt, 1, "CREATE_VERSION", -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 2, SQLITE_VEC_VERSION, -1, SQLITE_STATIC);
sqlite3_bind_text(stmt, 3, "CREATE_VERSION_MAJOR", -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 4, SQLITE_VEC_VERSION_MAJOR);
sqlite3_bind_text(stmt, 5, "CREATE_VERSION_MINOR", -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 6, SQLITE_VEC_VERSION_MINOR);
sqlite3_bind_text(stmt, 7, "CREATE_VERSION_PATCH", -1, SQLITE_STATIC);
sqlite3_bind_int(stmt, 8, SQLITE_VEC_VERSION_PATCH);
2024-04-20 13:38:58 -07:00
if(sqlite3_step(stmt) != SQLITE_DONE) {
// TODO(IMP)
2024-06-24 23:26:11 -07:00
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf("Could not seed '_info' shadow table: %s",
sqlite3_errmsg(db));
goto error;
2024-06-24 23:26:11 -07:00
}
sqlite3_finalize(stmt);
2024-06-24 23:26:11 -07:00
#if SQLITE_VEC_ENABLE_DISKANN
// Seed medoid entries for DiskANN-indexed columns
for (int i = 0; i < pNew->numVectorColumns; i++) {
if (pNew->vector_columns[i].index_type != VEC0_INDEX_TYPE_DISKANN) {
continue;
}
char *key = sqlite3_mprintf("diskann_medoid_%02d", i);
char *zInsert = sqlite3_mprintf(
"INSERT INTO " VEC0_SHADOW_INFO_NAME "(key, value) VALUES (?1, ?2)",
pNew->schemaName, pNew->tableName);
rc = sqlite3_prepare_v2(db, zInsert, -1, &stmt, NULL);
sqlite3_free(zInsert);
if (rc != SQLITE_OK) {
sqlite3_free(key);
sqlite3_finalize(stmt);
goto error;
}
sqlite3_bind_text(stmt, 1, key, -1, sqlite3_free);
sqlite3_bind_null(stmt, 2); // NULL means empty graph
if (sqlite3_step(stmt) != SQLITE_DONE) {
sqlite3_finalize(stmt);
goto error;
}
sqlite3_finalize(stmt);
}
#endif
// create the _chunks shadow table
char *zCreateShadowChunks = NULL;
if(pNew->numPartitionColumns) {
sqlite3_str * s = sqlite3_str_new(NULL);
sqlite3_str_appendf(s, "CREATE TABLE " VEC0_SHADOW_CHUNKS_NAME "(", pNew->schemaName, pNew->tableName);
sqlite3_str_appendall(s, "chunk_id INTEGER PRIMARY KEY AUTOINCREMENT," "size INTEGER NOT NULL,");
sqlite3_str_appendall(s, "sequence_id integer,");
for(int i = 0; i < pNew->numPartitionColumns;i++) {
sqlite3_str_appendf(s, "partition%02d,", i);
}
sqlite3_str_appendall(s, "validity BLOB NOT NULL, rowids BLOB NOT NULL);");
zCreateShadowChunks = sqlite3_str_finish(s);
}else {
zCreateShadowChunks = sqlite3_mprintf(VEC0_SHADOW_CHUNKS_CREATE,
pNew->schemaName, pNew->tableName);
2024-06-24 23:26:11 -07:00
}
if (!zCreateShadowChunks) {
goto error;
}
rc = sqlite3_prepare_v2(db, zCreateShadowChunks, -1, &stmt, 0);
sqlite3_free((void *)zCreateShadowChunks);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
// IMP: V17740_01811
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf("Could not create '_chunks' shadow table: %s",
sqlite3_errmsg(db));
goto error;
}
sqlite3_finalize(stmt);
2024-04-20 13:38:58 -07:00
// create the _rowids shadow table
char *zCreateShadowRowids;
if (pNew->pkIsText) {
// adds a "text unique not null" constraint to the id column
zCreateShadowRowids = sqlite3_mprintf(VEC0_SHADOW_ROWIDS_CREATE_PK_TEXT,
pNew->schemaName, pNew->tableName);
} else {
zCreateShadowRowids = sqlite3_mprintf(VEC0_SHADOW_ROWIDS_CREATE_BASIC,
pNew->schemaName, pNew->tableName);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
if (!zCreateShadowRowids) {
goto error;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
rc = sqlite3_prepare_v2(db, zCreateShadowRowids, -1, &stmt, 0);
sqlite3_free((void *)zCreateShadowRowids);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
// IMP: V11631_28470
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf("Could not create '_rowids' shadow table: %s",
sqlite3_errmsg(db));
goto error;
}
sqlite3_finalize(stmt);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
for (int i = 0; i < pNew->numVectorColumns; i++) {
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
if (pNew->vector_columns[i].index_type != VEC0_INDEX_TYPE_FLAT)
continue;
char *zSql = sqlite3_mprintf(VEC0_SHADOW_VECTOR_N_CREATE,
pNew->schemaName, pNew->tableName, i);
if (!zSql) {
goto error;
}
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
// IMP: V25919_09989
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf(
"Could not create '_vector_chunks%02d' shadow table: %s", i,
sqlite3_errmsg(db));
goto error;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
sqlite3_finalize(stmt);
}
#if SQLITE_VEC_ENABLE_RESCORE
rc = rescore_create_tables(pNew, db, pzErr);
if (rc != SQLITE_OK) {
goto error;
}
#endif
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// Create IVF shadow tables for IVF-indexed vector columns
for (int i = 0; i < pNew->numVectorColumns; i++) {
if (pNew->vector_columns[i].index_type != VEC0_INDEX_TYPE_IVF) continue;
rc = ivf_create_shadow_tables(pNew, i);
if (rc != SQLITE_OK) {
*pzErr = sqlite3_mprintf("Could not create IVF shadow tables for column %d", i);
goto error;
}
}
#endif
#if SQLITE_VEC_ENABLE_DISKANN
// Create DiskANN shadow tables for indexed vector columns
for (int i = 0; i < pNew->numVectorColumns; i++) {
if (pNew->vector_columns[i].index_type != VEC0_INDEX_TYPE_DISKANN) {
continue;
}
// Create _vectors{NN} table
{
char *zSql = sqlite3_mprintf(
"CREATE TABLE " VEC0_SHADOW_VECTORS_N_NAME
" (rowid INTEGER PRIMARY KEY, vector BLOB NOT NULL);",
pNew->schemaName, pNew->tableName, i);
if (!zSql) {
goto error;
}
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
sqlite3_free(zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf(
"Could not create '_vectors%02d' shadow table: %s", i,
sqlite3_errmsg(db));
goto error;
}
sqlite3_finalize(stmt);
}
// Create _diskann_nodes{NN} table
{
char *zSql = sqlite3_mprintf(
"CREATE TABLE " VEC0_SHADOW_DISKANN_NODES_N_NAME " ("
"rowid INTEGER PRIMARY KEY, "
"neighbors_validity BLOB NOT NULL, "
"neighbor_ids BLOB NOT NULL, "
"neighbor_quantized_vectors BLOB NOT NULL"
");",
pNew->schemaName, pNew->tableName, i);
if (!zSql) {
goto error;
}
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
sqlite3_free(zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf(
"Could not create '_diskann_nodes%02d' shadow table: %s", i,
sqlite3_errmsg(db));
goto error;
}
sqlite3_finalize(stmt);
}
// Create _diskann_buffer{NN} table (for batched inserts)
{
char *zSql = sqlite3_mprintf(
"CREATE TABLE " VEC0_SHADOW_DISKANN_BUFFER_N_NAME " ("
"rowid INTEGER PRIMARY KEY, "
"vector BLOB NOT NULL"
");",
pNew->schemaName, pNew->tableName, i);
if (!zSql) {
goto error;
}
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
sqlite3_free(zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf(
"Could not create '_diskann_buffer%02d' shadow table: %s", i,
sqlite3_errmsg(db));
goto error;
}
sqlite3_finalize(stmt);
}
}
#endif
// See SHADOW_TABLE_ROWID_QUIRK in vec0_new_chunk() — same "rowid PRIMARY KEY"
// without INTEGER type issue applies here.
for (int i = 0; i < pNew->numMetadataColumns; i++) {
char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME "(rowid PRIMARY KEY, data BLOB NOT NULL);",
pNew->schemaName, pNew->tableName, i);
if (!zSql) {
goto error;
}
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf(
"Could not create '_metata_chunks%02d' shadow table: %s", i,
sqlite3_errmsg(db));
goto error;
}
sqlite3_finalize(stmt);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(pNew->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME "(rowid PRIMARY KEY, data TEXT);",
pNew->schemaName, pNew->tableName, i);
if (!zSql) {
goto error;
}
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf(
"Could not create '_metadatatext%02d' shadow table: %s", i,
sqlite3_errmsg(db));
goto error;
}
sqlite3_finalize(stmt);
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
if(pNew->numAuxiliaryColumns > 0) {
sqlite3_stmt * stmt;
sqlite3_str * s = sqlite3_str_new(NULL);
sqlite3_str_appendf(s, "CREATE TABLE " VEC0_SHADOW_AUXILIARY_NAME "( rowid integer PRIMARY KEY ", pNew->schemaName, pNew->tableName);
for(int i = 0; i < pNew->numAuxiliaryColumns; i++) {
sqlite3_str_appendf(s, ", value%02d", i);
}
sqlite3_str_appendall(s, ")");
char *zSql = sqlite3_str_finish(s);
if(!zSql) {
goto error;
}
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, NULL);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
sqlite3_finalize(stmt);
*pzErr = sqlite3_mprintf(
"Could not create auxiliary shadow table: %s",
sqlite3_errmsg(db));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
goto error;
}
sqlite3_finalize(stmt);
}
2024-04-20 13:38:58 -07:00
}
*ppVtab = (sqlite3_vtab *)pNew;
2024-04-20 13:38:58 -07:00
return SQLITE_OK;
error:
vec0_free(pNew);
sqlite3_free(pNew);
return SQLITE_ERROR;
2024-04-20 13:38:58 -07:00
}
static int vec0Create(sqlite3 *db, void *pAux, int argc,
const char *const *argv, sqlite3_vtab **ppVtab,
char **pzErr) {
return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, true);
}
static int vec0Connect(sqlite3 *db, void *pAux, int argc,
const char *const *argv, sqlite3_vtab **ppVtab,
char **pzErr) {
return vec0_init(db, pAux, argc, argv, ppVtab, pzErr, false);
}
2024-06-28 15:29:13 -07:00
static int vec0Disconnect(sqlite3_vtab *pVtab) {
vec0_vtab *p = (vec0_vtab *)pVtab;
vec0_free(p);
sqlite3_free(p);
return SQLITE_OK;
2024-04-20 13:38:58 -07:00
}
static int vec0Destroy(sqlite3_vtab *pVtab) {
vec0_vtab *p = (vec0_vtab *)pVtab;
sqlite3_stmt *stmt;
int rc;
const char *zSql;
2024-04-20 13:38:58 -07:00
// Free up any sqlite3_stmt, otherwise DROPs on those tables will fail
vec0_free_resources(p);
2024-06-28 15:29:13 -07:00
// TODO(test) later: can't evidence-of here, bc always gives "SQL logic error" instead of
// provided error
zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_CHUNKS_NAME, p->schemaName,
p->tableName);
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
vtab_set_error(pVtab, "could not drop chunks shadow table");
goto done;
2024-04-20 13:38:58 -07:00
}
sqlite3_finalize(stmt);
zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_INFO_NAME, p->schemaName,
p->tableName);
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
vtab_set_error(pVtab, "could not drop info shadow table");
goto done;
2024-04-20 13:38:58 -07:00
}
sqlite3_finalize(stmt);
2024-04-20 13:38:58 -07:00
zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_ROWIDS_NAME, p->schemaName,
p->tableName);
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
goto done;
2024-04-20 13:38:58 -07:00
}
sqlite3_finalize(stmt);
2024-04-20 13:38:58 -07:00
for (int i = 0; i < p->numVectorColumns; i++) {
#if SQLITE_VEC_ENABLE_DISKANN
if (p->vector_columns[i].index_type == VEC0_INDEX_TYPE_DISKANN) {
// Drop DiskANN shadow tables
zSql = sqlite3_mprintf("DROP TABLE IF EXISTS " VEC0_SHADOW_VECTORS_N_NAME,
p->schemaName, p->tableName, i);
if (zSql) {
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
goto done;
}
sqlite3_finalize(stmt);
}
zSql = sqlite3_mprintf("DROP TABLE IF EXISTS " VEC0_SHADOW_DISKANN_NODES_N_NAME,
p->schemaName, p->tableName, i);
if (zSql) {
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
goto done;
}
sqlite3_finalize(stmt);
}
zSql = sqlite3_mprintf("DROP TABLE IF EXISTS " VEC0_SHADOW_DISKANN_BUFFER_N_NAME,
p->schemaName, p->tableName, i);
if (zSql) {
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
goto done;
}
sqlite3_finalize(stmt);
}
continue;
}
#endif
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_FLAT)
continue;
zSql = sqlite3_mprintf("DROP TABLE \"%w\".\"%w\"", p->schemaName,
p->shadowVectorChunksNames[i]);
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
goto done;
}
sqlite3_finalize(stmt);
}
#if SQLITE_VEC_ENABLE_RESCORE
rc = rescore_drop_tables(p);
if (rc != SQLITE_OK) {
goto done;
}
#endif
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// Drop IVF shadow tables
for (int i = 0; i < p->numVectorColumns; i++) {
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_IVF) continue;
ivf_drop_shadow_tables(p, i);
}
#endif
if(p->numAuxiliaryColumns > 0) {
zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_AUXILIARY_NAME, p->schemaName, p->tableName);
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
goto done;
}
sqlite3_finalize(stmt);
}
2024-04-20 13:38:58 -07:00
for (int i = 0; i < p->numMetadataColumns; i++) {
zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_N_NAME, p->schemaName,p->tableName, i);
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
goto done;
}
sqlite3_finalize(stmt);
2024-04-20 13:38:58 -07:00
if(p->metadata_columns[i].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
zSql = sqlite3_mprintf("DROP TABLE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME, p->schemaName,p->tableName, i);
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0);
sqlite3_free((void *)zSql);
if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) {
rc = SQLITE_ERROR;
goto done;
}
sqlite3_finalize(stmt);
}
2024-10-11 09:09:32 -07:00
}
stmt = NULL;
rc = SQLITE_OK;
done:
sqlite3_finalize(stmt);
vec0_free(p);
// If there was an error
if (rc == SQLITE_OK) {
sqlite3_free(p);
2024-10-11 09:09:32 -07:00
}
return rc;
2024-10-11 09:09:32 -07:00
}
static int vec0Open(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
UNUSED_PARAMETER(p);
vec0_cursor *pCur;
pCur = sqlite3_malloc(sizeof(*pCur));
if (pCur == 0)
2024-04-20 13:38:58 -07:00
return SQLITE_NOMEM;
memset(pCur, 0, sizeof(*pCur));
*ppCursor = &pCur->base;
return SQLITE_OK;
}
2024-04-20 13:38:58 -07:00
static int vec0Close(sqlite3_vtab_cursor *cur) {
vec0_cursor *pCur = (vec0_cursor *)cur;
vec0_cursor_clear(pCur);
sqlite3_free(pCur);
return SQLITE_OK;
}
2024-04-20 13:38:58 -07:00
// All the different type of "values" provided to argv/argc in vec0Filter.
// These enums denote the use and purpose of all of them.
typedef enum {
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
2024-04-20 13:38:58 -07:00
// ~~~ KNN QUERIES ~~~ //
VEC0_IDXSTR_KIND_KNN_MATCH = '{',
VEC0_IDXSTR_KIND_KNN_K = '}',
VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[',
// argv[i] is a constraint on a PARTITON KEY column in a KNN query
//
VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']',
// argv[i] is a constraint on the distance column in a KNN query
VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT = '*',
2024-10-11 09:09:32 -07:00
// ~~~ POINT QUERIES ~~~ //
VEC0_IDXSTR_KIND_POINT_ID = '!',
// ~~~ ??? ~~~ //
VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&',
} vec0_idxstr_kind;
2024-04-20 13:38:58 -07:00
// The different SQLITE_INDEX_CONSTRAINT values that vec0 partition key columns
// support, but as characters that fit nicely in idxstr.
typedef enum {
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
// Equality constraint on a PARTITON KEY column, ex `user_id = 123`
VEC0_PARTITION_OPERATOR_EQ = 'a',
// "Greater than" constraint on a PARTITON KEY column, ex `year > 2024`
VEC0_PARTITION_OPERATOR_GT = 'b',
// "Less than or equal to" constraint on a PARTITON KEY column, ex `year <= 2024`
VEC0_PARTITION_OPERATOR_LE = 'c',
// "Less than" constraint on a PARTITON KEY column, ex `year < 2024`
VEC0_PARTITION_OPERATOR_LT = 'd',
// "Greater than or equal to" constraint on a PARTITON KEY column, ex `year >= 2024`
VEC0_PARTITION_OPERATOR_GE = 'e',
// "Not equal to" constraint on a PARTITON KEY column, ex `year != 2024`
VEC0_PARTITION_OPERATOR_NE = 'f',
} vec0_partition_operator;
typedef enum {
VEC0_METADATA_OPERATOR_EQ = 'a',
VEC0_METADATA_OPERATOR_GT = 'b',
VEC0_METADATA_OPERATOR_LE = 'c',
VEC0_METADATA_OPERATOR_LT = 'd',
VEC0_METADATA_OPERATOR_GE = 'e',
VEC0_METADATA_OPERATOR_NE = 'f',
VEC0_METADATA_OPERATOR_IN = 'g',
} vec0_metadata_operator;
2024-06-24 23:26:11 -07:00
typedef enum {
VEC0_DISTANCE_CONSTRAINT_GT = 'a',
VEC0_DISTANCE_CONSTRAINT_GE = 'b',
VEC0_DISTANCE_CONSTRAINT_LT = 'c',
VEC0_DISTANCE_CONSTRAINT_LE = 'd',
} vec0_distance_constraint_operator;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
vec0_vtab *p = (vec0_vtab *)pVTab;
/**
* Possible query plans are:
* 1. KNN when:
* a) An `MATCH` op on vector column
* b) ORDER BY on distance column
* c) LIMIT
* d) rowid in (...) OPTIONAL
* 2. Point when:
* a) An `EQ` op on rowid column
* 3. else: fullscan
*
*/
int iMatchTerm = -1;
int iMatchVectorTerm = -1;
int iLimitTerm = -1;
int iRowidTerm = -1;
int iKTerm = -1;
int iRowidInTerm = -1;
int hasAuxConstraint = 0;
#ifdef SQLITE_VEC_DEBUG
printf("pIdxInfo->nOrderBy=%d, pIdxInfo->nConstraint=%d\n", pIdxInfo->nOrderBy, pIdxInfo->nConstraint);
#endif
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
u8 vtabIn = 0;
#if COMPILER_SUPPORTS_VTAB_IN
if (sqlite3_libversion_number() >= 3038000) {
vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
#endif
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
#ifdef SQLITE_VEC_DEBUG
printf("xBestIndex [%d] usable=%d iColumn=%d op=%d vtabin=%d\n", i,
pIdxInfo->aConstraint[i].usable, pIdxInfo->aConstraint[i].iColumn,
pIdxInfo->aConstraint[i].op, vtabIn);
#endif
if (!pIdxInfo->aConstraint[i].usable)
continue;
int iColumn = pIdxInfo->aConstraint[i].iColumn;
int op = pIdxInfo->aConstraint[i].op;
if (op == SQLITE_INDEX_CONSTRAINT_LIMIT) {
iLimitTerm = i;
2024-06-24 23:26:11 -07:00
}
if (op == SQLITE_INDEX_CONSTRAINT_MATCH &&
vec0_column_idx_is_vector(p, iColumn)) {
if (iMatchTerm > -1) {
vtab_set_error(
pVTab, "only 1 MATCH operator is allowed in a single vec0 query");
return SQLITE_ERROR;
}
iMatchTerm = i;
iMatchVectorTerm = vec0_column_idx_to_vector_idx(p, iColumn);
}
if (op == SQLITE_INDEX_CONSTRAINT_EQ && iColumn == VEC0_COLUMN_ID) {
if (vtabIn) {
if (iRowidInTerm != -1) {
vtab_set_error(pVTab, "only 1 'rowid in (..)' operator is allowed in "
"a single vec0 query");
return SQLITE_ERROR;
2024-08-01 02:45:51 -07:00
}
iRowidInTerm = i;
2024-04-20 13:38:58 -07:00
} else {
iRowidTerm = i;
2024-04-20 13:38:58 -07:00
}
}
if (op == SQLITE_INDEX_CONSTRAINT_EQ && iColumn == vec0_column_k_idx(p)) {
iKTerm = i;
}
if(
(op != SQLITE_INDEX_CONSTRAINT_LIMIT && op != SQLITE_INDEX_CONSTRAINT_OFFSET)
&& vec0_column_idx_is_auxiliary(p, iColumn)) {
hasAuxConstraint = 1;
}
2024-04-20 13:38:58 -07:00
}
sqlite3_str *idxStr = sqlite3_str_new(NULL);
int rc;
2024-04-20 13:38:58 -07:00
if (iMatchTerm >= 0) {
if (iLimitTerm < 0 && iKTerm < 0) {
vtab_set_error(
pVTab,
"A LIMIT or 'k = ?' constraint is required on vec0 knn queries.");
rc = SQLITE_ERROR;
goto done;
}
if (iLimitTerm >= 0 && iKTerm >= 0) {
vtab_set_error(pVTab, "Only LIMIT or 'k =?' can be provided, not both");
rc = SQLITE_ERROR;
goto done;
}
2024-04-20 13:38:58 -07:00
if (pIdxInfo->nOrderBy) {
if (pIdxInfo->nOrderBy > 1) {
vtab_set_error(pVTab, "Only a single 'ORDER BY distance' clause is "
"allowed on vec0 KNN queries");
rc = SQLITE_ERROR;
goto done;
}
if (pIdxInfo->aOrderBy[0].iColumn != vec0_column_distance_idx(p)) {
vtab_set_error(pVTab,
"Only a single 'ORDER BY distance' clause is allowed on "
"vec0 KNN queries, not on other columns");
rc = SQLITE_ERROR;
goto done;
}
if (pIdxInfo->aOrderBy[0].desc) {
vtab_set_error(
pVTab, "Only ascending in ORDER BY distance clause is supported, "
"DESC is not supported yet.");
rc = SQLITE_ERROR;
goto done;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
}
if(hasAuxConstraint) {
// IMP: V25623_09693
vtab_set_error(pVTab, "An illegal WHERE constraint was provided on a vec0 auxiliary column in a KNN query.");
rc = SQLITE_ERROR;
goto done;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
2024-04-20 13:38:58 -07:00
sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_KNN);
2024-04-20 13:38:58 -07:00
int argvIndex = 1;
pIdxInfo->aConstraintUsage[iMatchTerm].argvIndex = argvIndex++;
pIdxInfo->aConstraintUsage[iMatchTerm].omit = 1;
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_MATCH);
sqlite3_str_appendchar(idxStr, 3, '_');
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if (iLimitTerm >= 0) {
pIdxInfo->aConstraintUsage[iLimitTerm].argvIndex = argvIndex++;
pIdxInfo->aConstraintUsage[iLimitTerm].omit = 1;
} else {
pIdxInfo->aConstraintUsage[iKTerm].argvIndex = argvIndex++;
pIdxInfo->aConstraintUsage[iKTerm].omit = 1;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_K);
sqlite3_str_appendchar(idxStr, 3, '_');
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
#if COMPILER_SUPPORTS_VTAB_IN
if (iRowidInTerm >= 0) {
// already validated as >= SQLite 3.38 bc iRowidInTerm is only >= 0 when
// vtabIn == 1
sqlite3_vtab_in(pIdxInfo, iRowidInTerm, 1);
pIdxInfo->aConstraintUsage[iRowidInTerm].argvIndex = argvIndex++;
pIdxInfo->aConstraintUsage[iRowidInTerm].omit = 1;
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_ROWID_IN);
sqlite3_str_appendchar(idxStr, 3, '_');
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
#endif
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
// find any PARTITION KEY column constraints
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
if (!pIdxInfo->aConstraint[i].usable)
continue;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
int iColumn = pIdxInfo->aConstraint[i].iColumn;
int op = pIdxInfo->aConstraint[i].op;
if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
continue;
}
if(!vec0_column_idx_is_partition(p, iColumn)) {
continue;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
int partition_idx = vec0_column_idx_to_partition_idx(p, iColumn);
char value = 0;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
switch(op) {
case SQLITE_INDEX_CONSTRAINT_EQ: {
value = VEC0_PARTITION_OPERATOR_EQ;
break;
}
case SQLITE_INDEX_CONSTRAINT_GT: {
value = VEC0_PARTITION_OPERATOR_GT;
break;
}
case SQLITE_INDEX_CONSTRAINT_LE: {
value = VEC0_PARTITION_OPERATOR_LE;
break;
}
case SQLITE_INDEX_CONSTRAINT_LT: {
value = VEC0_PARTITION_OPERATOR_LT;
break;
}
case SQLITE_INDEX_CONSTRAINT_GE: {
value = VEC0_PARTITION_OPERATOR_GE;
break;
}
case SQLITE_INDEX_CONSTRAINT_NE: {
value = VEC0_PARTITION_OPERATOR_NE;
break;
}
}
if(value) {
pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
pIdxInfo->aConstraintUsage[i].omit = 1;
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT);
sqlite3_str_appendchar(idxStr, 1, 'A' + partition_idx);
sqlite3_str_appendchar(idxStr, 1, value);
sqlite3_str_appendchar(idxStr, 1, '_');
}
2024-04-20 13:38:58 -07:00
2024-06-24 23:26:11 -07:00
}
2024-04-20 13:38:58 -07:00
// find any metadata column constraints
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
if (!pIdxInfo->aConstraint[i].usable)
continue;
int iColumn = pIdxInfo->aConstraint[i].iColumn;
int op = pIdxInfo->aConstraint[i].op;
if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
continue;
2024-06-24 23:26:11 -07:00
}
if(!vec0_column_idx_is_metadata(p, iColumn)) {
continue;
2024-06-24 23:26:11 -07:00
}
int metadata_idx = vec0_column_idx_to_metadata_idx(p, iColumn);
char value = 0;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
switch(op) {
case SQLITE_INDEX_CONSTRAINT_EQ: {
int vtabIn = 0;
#if COMPILER_SUPPORTS_VTAB_IN
if (sqlite3_libversion_number() >= 3038000) {
vtabIn = sqlite3_vtab_in(pIdxInfo, i, -1);
}
if(vtabIn) {
switch(p->metadata_columns[metadata_idx].kind) {
case VEC0_METADATA_COLUMN_KIND_FLOAT:
case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
// IMP: V15248_32086
rc = SQLITE_ERROR;
vtab_set_error(pVTab, "'xxx in (...)' is only available on INTEGER or TEXT metadata columns.");
goto done;
break;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER:
case VEC0_METADATA_COLUMN_KIND_TEXT: {
break;
}
}
value = VEC0_METADATA_OPERATOR_IN;
sqlite3_vtab_in(pIdxInfo, i, 1);
}else
#endif
{
value = VEC0_PARTITION_OPERATOR_EQ;
}
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case SQLITE_INDEX_CONSTRAINT_GT: {
value = VEC0_METADATA_OPERATOR_GT;
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case SQLITE_INDEX_CONSTRAINT_LE: {
value = VEC0_METADATA_OPERATOR_LE;
break;
}
case SQLITE_INDEX_CONSTRAINT_LT: {
value = VEC0_METADATA_OPERATOR_LT;
break;
}
case SQLITE_INDEX_CONSTRAINT_GE: {
value = VEC0_METADATA_OPERATOR_GE;
break;
}
case SQLITE_INDEX_CONSTRAINT_NE: {
value = VEC0_METADATA_OPERATOR_NE;
break;
}
default: {
// IMP: V16511_00582
rc = SQLITE_ERROR;
vtab_set_error(pVTab,
"An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. "
"Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed."
);
goto done;
}
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(p->metadata_columns[metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_BOOLEAN) {
if(!(value == VEC0_METADATA_OPERATOR_EQ || value == VEC0_METADATA_OPERATOR_NE)) {
// IMP: V10145_26984
rc = SQLITE_ERROR;
vtab_set_error(pVTab, "ONLY EQUALS (=) or NOT_EQUALS (!=) operators are allowed on boolean metadata columns.");
goto done;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
pIdxInfo->aConstraintUsage[i].omit = 1;
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_METADATA_CONSTRAINT);
sqlite3_str_appendchar(idxStr, 1, 'A' + metadata_idx);
sqlite3_str_appendchar(idxStr, 1, value);
sqlite3_str_appendchar(idxStr, 1, '_');
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
// find any distance column constraints
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
if (!pIdxInfo->aConstraint[i].usable)
continue;
int iColumn = pIdxInfo->aConstraint[i].iColumn;
int op = pIdxInfo->aConstraint[i].op;
if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
continue;
}
if(vec0_column_distance_idx(p) != iColumn) {
continue;
}
char value = 0;
switch(op) {
case SQLITE_INDEX_CONSTRAINT_GT: {
value = VEC0_DISTANCE_CONSTRAINT_GT;
break;
}
case SQLITE_INDEX_CONSTRAINT_GE: {
value = VEC0_DISTANCE_CONSTRAINT_GE;
break;
}
case SQLITE_INDEX_CONSTRAINT_LT: {
value = VEC0_DISTANCE_CONSTRAINT_LT;
break;
}
case SQLITE_INDEX_CONSTRAINT_LE: {
value = VEC0_DISTANCE_CONSTRAINT_LE;
break;
}
default: {
// IMP TODO
rc = SQLITE_ERROR;
vtab_set_error(
pVTab,
"Illegal WHERE constraint on distance column in a KNN query. "
"Only one of GT, GE, LT, LE constraints are allowed."
);
goto done;
}
}
2024-04-20 13:38:58 -07:00
pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
pIdxInfo->aConstraintUsage[i].omit = 1;
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT);
sqlite3_str_appendchar(idxStr, 1, value);
sqlite3_str_appendchar(idxStr, 1, '_');
sqlite3_str_appendchar(idxStr, 1, '_');
}
2024-04-20 13:38:58 -07:00
2024-06-25 08:52:48 -07:00
pIdxInfo->idxNum = iMatchVectorTerm;
pIdxInfo->estimatedCost = 30.0;
pIdxInfo->estimatedRows = 10;
2024-06-25 08:52:48 -07:00
} else if (iRowidTerm >= 0) {
sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_POINT);
pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_POINT_ID);
sqlite3_str_appendchar(idxStr, 3, '_');
pIdxInfo->idxNum = pIdxInfo->colUsed;
pIdxInfo->estimatedCost = 10.0;
pIdxInfo->estimatedRows = 1;
} else {
sqlite3_str_appendchar(idxStr, 1, VEC0_QUERY_PLAN_FULLSCAN);
pIdxInfo->estimatedCost = 3000000.0;
pIdxInfo->estimatedRows = 100000;
2024-06-25 08:52:48 -07:00
}
pIdxInfo->idxStr = sqlite3_str_finish(idxStr);
idxStr = NULL;
if (!pIdxInfo->idxStr) {
rc = SQLITE_OK;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
goto done;
}
pIdxInfo->needToFreeIdxStr = 1;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = SQLITE_OK;
2024-04-20 13:38:58 -07:00
done:
if(idxStr) {
sqlite3_str_finish(idxStr);
2024-06-25 08:52:48 -07:00
}
return rc;
}
// forward delcaration bc vec0Filter uses it
static int vec0Next(sqlite3_vtab_cursor *cur);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
void merge_sorted_lists(f32 *a, i64 *a_rowids, i64 a_length, f32 *b,
i64 *b_rowids, i32 *b_top_idxs, i64 b_length, f32 *out,
i64 *out_rowids, i64 out_length, i64 *out_used) {
// assert((a_length >= out_length) || (b_length >= out_length));
i64 ptrA = 0;
i64 ptrB = 0;
for (int i = 0; i < out_length; i++) {
if ((ptrA >= a_length) && (ptrB >= b_length)) {
*out_used = i;
return;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
if (ptrA >= a_length) {
out[i] = b[b_top_idxs[ptrB]];
out_rowids[i] = b_rowids[b_top_idxs[ptrB]];
ptrB++;
} else if (ptrB >= b_length) {
out[i] = a[ptrA];
out_rowids[i] = a_rowids[ptrA];
ptrA++;
} else {
if (a[ptrA] <= b[b_top_idxs[ptrB]]) {
out[i] = a[ptrA];
out_rowids[i] = a_rowids[ptrA];
ptrA++;
} else {
out[i] = b[b_top_idxs[ptrB]];
out_rowids[i] = b_rowids[b_top_idxs[ptrB]];
ptrB++;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
}
}
*out_used = out_length;
}
2024-07-05 12:07:45 -07:00
u8 *bitmap_new(i32 n) {
assert(n % 8 == 0);
u8 *p = sqlite3_malloc(n * sizeof(u8) / CHAR_BIT);
if (p) {
memset(p, 0, n * sizeof(u8) / CHAR_BIT);
2024-07-05 12:07:45 -07:00
}
return p;
2024-04-20 13:38:58 -07:00
}
u8 *bitmap_new_from(i32 n, u8 *from) {
assert(n % 8 == 0);
u8 *p = sqlite3_malloc(n * sizeof(u8) / CHAR_BIT);
if (p) {
memcpy(p, from, n / CHAR_BIT);
}
return p;
2024-04-20 13:38:58 -07:00
}
void bitmap_copy(u8 *base, u8 *from, i32 n) {
assert(n % 8 == 0);
memcpy(base, from, n / CHAR_BIT);
2024-04-20 13:38:58 -07:00
}
void bitmap_and_inplace(u8 *base, u8 *other, i32 n) {
assert((n % 8) == 0);
for (int i = 0; i < n / CHAR_BIT; i++) {
base[i] = base[i] & other[i];
}
}
void bitmap_set(u8 *bitmap, i32 position, int value) {
if (value) {
bitmap[position / CHAR_BIT] |= 1 << (position % CHAR_BIT);
} else {
bitmap[position / CHAR_BIT] &= ~(1 << (position % CHAR_BIT));
}
}
int bitmap_get(u8 *bitmap, i32 position) {
return (((bitmap[position / CHAR_BIT]) >> (position % CHAR_BIT)) & 1);
}
void bitmap_clear(u8 *bitmap, i32 n) {
assert((n % 8) == 0);
memset(bitmap, 0, n / CHAR_BIT);
}
void bitmap_fill(u8 *bitmap, i32 n) {
assert((n % 8) == 0);
memset(bitmap, 0xFF, n / CHAR_BIT);
}
/**
* @brief Finds the minimum k items in distances, and writes the indicies to
* out.
*
* @param distances input f32 array of size n, the items to consider.
* @param n: size of distances array.
* @param out: Output array of size k, will contain at most k element indicies
* @param k: Size of output array
* @return int
*/
int min_idx(const f32 *distances, i32 n, u8 *candidates, i32 *out, i32 k,
u8 *bTaken, i32 *k_used) {
assert(k > 0);
assert(k <= n);
2024-04-20 13:38:58 -07:00
#ifdef SQLITE_VEC_EXPERIMENTAL_MIN_IDX
// Max-heap variant: O(n log k) single-pass.
// out[0..heap_size-1] stores indices; heap ordered by distances descending
// so out[0] is always the index of the LARGEST distance in the top-k.
(void)bTaken;
int heap_size = 0;
#define HEAP_SIFT_UP(pos) do { \
int _c = (pos); \
while (_c > 0) { \
int _p = (_c - 1) / 2; \
if (distances[out[_p]] < distances[out[_c]]) { \
i32 _tmp = out[_p]; out[_p] = out[_c]; out[_c] = _tmp; \
_c = _p; \
} else break; \
} \
} while(0)
#define HEAP_SIFT_DOWN(pos, sz) do { \
int _p = (pos); \
for (;;) { \
int _l = 2*_p + 1, _r = 2*_p + 2, _largest = _p; \
if (_l < (sz) && distances[out[_l]] > distances[out[_largest]]) \
_largest = _l; \
if (_r < (sz) && distances[out[_r]] > distances[out[_largest]]) \
_largest = _r; \
if (_largest == _p) break; \
i32 _tmp = out[_p]; out[_p] = out[_largest]; out[_largest] = _tmp; \
_p = _largest; \
} \
} while(0)
for (int i = 0; i < n; i++) {
if (!bitmap_get(candidates, i))
continue;
if (heap_size < k) {
out[heap_size] = i;
heap_size++;
HEAP_SIFT_UP(heap_size - 1);
} else if (distances[i] < distances[out[0]]) {
out[0] = i;
HEAP_SIFT_DOWN(0, heap_size);
}
}
// Heapsort to produce ascending order.
for (int i = heap_size - 1; i > 0; i--) {
i32 tmp = out[0]; out[0] = out[i]; out[i] = tmp;
HEAP_SIFT_DOWN(0, i);
}
#undef HEAP_SIFT_UP
#undef HEAP_SIFT_DOWN
*k_used = heap_size;
return SQLITE_OK;
2024-04-20 13:38:58 -07:00
#else
// Original: O(n*k) repeated linear scan with bitmap.
bitmap_clear(bTaken, n);
2024-04-20 13:38:58 -07:00
for (int ik = 0; ik < k; ik++) {
int min_idx = 0;
while (min_idx < n &&
(bitmap_get(bTaken, min_idx) || !bitmap_get(candidates, min_idx))) {
min_idx++;
}
if (min_idx >= n) {
*k_used = ik;
return SQLITE_OK;
}
for (int i = 0; i < n; i++) {
if (distances[i] <= distances[min_idx] && !bitmap_get(bTaken, i) &&
(bitmap_get(candidates, i))) {
min_idx = i;
}
2024-04-20 13:38:58 -07:00
}
out[ik] = min_idx;
bitmap_set(bTaken, min_idx, 1);
}
*k_used = k;
return SQLITE_OK;
2024-04-20 13:38:58 -07:00
#endif
}
int vec0_get_metadata_text_long_value(
vec0_vtab * p,
sqlite3_stmt ** stmt,
int metadata_idx,
i64 rowid,
int *n,
char ** s) {
int rc;
if(!(*stmt)) {
const char * zSql = sqlite3_mprintf("select data from " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " where rowid = ?", p->schemaName, p->tableName, metadata_idx);
if(!zSql) {
rc = SQLITE_NOMEM;
goto done;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, stmt, NULL);
sqlite3_free( (void *) zSql);
if(rc != SQLITE_OK) {
goto done;
2024-04-20 13:38:58 -07:00
}
}
2024-04-20 13:38:58 -07:00
sqlite3_reset(*stmt);
sqlite3_bind_int64(*stmt, 1, rowid);
rc = sqlite3_step(*stmt);
if(rc != SQLITE_ROW) {
rc = SQLITE_ERROR;
goto done;
2024-04-20 13:38:58 -07:00
}
*s = (char *) sqlite3_column_text(*stmt, 0);
*n = sqlite3_column_bytes(*stmt, 0);
rc = SQLITE_OK;
done:
return rc;
}
/**
* @brief Crete at "iterator" (sqlite3_stmt) of chunks with the given constraints
*
* Any VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT values in idxStr/argv will be applied
* as WHERE constraints in the underlying stmt SQL, and any consumer of the stmt
* can freely step through the stmt with all constraints satisfied.
*
* @param p - vec0_vtab
* @param idxStr - the xBestIndex/xFilter idxstr containing VEC0_IDXSTR values
* @param argc - number of argv values from xFilter
* @param argv - array of sqlite3_value from xFilter
* @param outStmt - output sqlite3_stmt of chunks with all filters applied
* @return int SQLITE_OK on success, error code otherwise
*/
int vec0_chunks_iter(vec0_vtab * p, const char * idxStr, int argc, sqlite3_value ** argv, sqlite3_stmt** outStmt) {
// always null terminated, enforced by SQLite
int idxStrLength = strlen(idxStr);
// "1" refers to the initial vec0_query_plan char, 4 is the number of chars per "element"
int numValueEntries = (idxStrLength-1) / 4;
assert(argc == numValueEntries);
int rc;
sqlite3_str * s = sqlite3_str_new(NULL);
sqlite3_str_appendf(s, "select chunk_id, validity, rowids "
" from " VEC0_SHADOW_CHUNKS_NAME,
p->schemaName, p->tableName);
int appendedWhere = 0;
for(int i = 0; i < numValueEntries; i++) {
int idx = 1 + (i * 4);
char kind = idxStr[idx + 0];
if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) {
continue;
2024-04-20 13:38:58 -07:00
}
2024-06-28 15:29:13 -07:00
int partition_idx = idxStr[idx + 1] - 'A';
int operator = idxStr[idx + 2];
// idxStr[idx + 3] is just null, a '_' placeholder
if(!appendedWhere) {
sqlite3_str_appendall(s, " WHERE ");
appendedWhere = 1;
}else {
sqlite3_str_appendall(s, " AND ");
2024-04-20 13:38:58 -07:00
}
switch(operator) {
case VEC0_PARTITION_OPERATOR_EQ:
sqlite3_str_appendf(s, " partition%02d = ? ", partition_idx);
break;
case VEC0_PARTITION_OPERATOR_GT:
sqlite3_str_appendf(s, " partition%02d > ? ", partition_idx);
break;
case VEC0_PARTITION_OPERATOR_LE:
sqlite3_str_appendf(s, " partition%02d <= ? ", partition_idx);
break;
case VEC0_PARTITION_OPERATOR_LT:
sqlite3_str_appendf(s, " partition%02d < ? ", partition_idx);
break;
case VEC0_PARTITION_OPERATOR_GE:
sqlite3_str_appendf(s, " partition%02d >= ? ", partition_idx);
break;
case VEC0_PARTITION_OPERATOR_NE:
sqlite3_str_appendf(s, " partition%02d != ? ", partition_idx);
break;
default: {
char * zSql = sqlite3_str_finish(s);
sqlite3_free(zSql);
return SQLITE_ERROR;
}
2024-04-20 13:38:58 -07:00
}
}
char *zSql = sqlite3_str_finish(s);
if (!zSql) {
return SQLITE_NOMEM;
}
2024-10-11 09:09:32 -07:00
rc = sqlite3_prepare_v2(p->db, zSql, -1, outStmt, NULL);
sqlite3_free(zSql);
if(rc != SQLITE_OK) {
return rc;
}
int n = 1;
for(int i = 0; i < numValueEntries; i++) {
int idx = 1 + (i * 4);
char kind = idxStr[idx + 0];
if(kind != VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT) {
continue;
2024-04-20 13:38:58 -07:00
}
sqlite3_bind_value(*outStmt, n++, argv[i]);
}
return rc;
}
// a single `xxx in (...)` constraint on a metadata column. TEXT or INTEGER only for now.
struct Vec0MetadataIn{
// index of argv[i]` the constraint is on
int argv_idx;
// metadata column index of the constraint, derived from idxStr + argv_idx
int metadata_idx;
// array of the copied `(...)` values from sqlite3_vtab_in_first()/sqlite3_vtab_in_next()
struct Array array;
};
// Array elements for `xxx in (...)` values for a text column. basically just a string
struct Vec0MetadataInTextEntry {
int n;
char * zString;
};
int vec0_metadata_filter_text(vec0_vtab * p, sqlite3_value * value, const void * buffer, int size, vec0_metadata_operator op, u8* b, int metadata_idx, int chunk_rowid, struct Array * aMetadataIn, int argv_idx) {
int rc;
sqlite3_stmt * stmt = NULL;
i64 * rowids = NULL;
sqlite3_blob * rowidsBlob;
const char * sTarget = (const char *) sqlite3_value_text(value);
int nTarget = sqlite3_value_bytes(value);
// TODO(perf): only text metadata news the rowids BLOB. Make it so that
// rowids BLOB is re-used when multiple fitlers on text columns,
// ex "name BETWEEN 'a' and 'b'""
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids", chunk_rowid, 0, &rowidsBlob);
if(rc != SQLITE_OK) {
return rc;
}
assert(sqlite3_blob_bytes(rowidsBlob) % sizeof(i64) == 0);
assert((sqlite3_blob_bytes(rowidsBlob) / sizeof(i64)) == size);
rowids = sqlite3_malloc(sqlite3_blob_bytes(rowidsBlob));
if(!rowids) {
sqlite3_blob_close(rowidsBlob);
return SQLITE_NOMEM;
}
rc = sqlite3_blob_read(rowidsBlob, rowids, sqlite3_blob_bytes(rowidsBlob), 0);
if(rc != SQLITE_OK) {
sqlite3_blob_close(rowidsBlob);
return rc;
}
sqlite3_blob_close(rowidsBlob);
2024-04-20 13:38:58 -07:00
switch(op) {
int nPrefix;
char * sPrefix;
char *sFull;
int nFull;
u8 * view;
case VEC0_METADATA_OPERATOR_EQ: {
for(int i = 0; i < size; i++) {
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
nPrefix = ((int*) view)[0];
sPrefix = (char *) &view[4];
// for EQ the text lengths must match
if(nPrefix != nTarget) {
bitmap_set(b, i, 0);
continue;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
// for short strings, use the prefix comparison direclty
if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
bitmap_set(b, i, cmpPrefix == 0);
continue;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
// for EQ on longs strings, the prefix must match
if(cmpPrefix) {
bitmap_set(b, i, 0);
continue;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
// consult the full string
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
if(rc != SQLITE_OK) {
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
goto done;
}
if(nPrefix != nFull) {
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = SQLITE_ERROR;
goto done;
}
bitmap_set(b, i, strncmp(sFull, sTarget, nFull) == 0);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_METADATA_OPERATOR_NE: {
for(int i = 0; i < size; i++) {
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
nPrefix = ((int*) view)[0];
sPrefix = (char *) &view[4];
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
// for NE if text lengths dont match, it never will
if(nPrefix != nTarget) {
bitmap_set(b, i, 1);
continue;
}
int cmpPrefix = strncmp(sPrefix, sTarget, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
// for short strings, use the prefix comparison direclty
if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
bitmap_set(b, i, cmpPrefix != 0);
continue;
}
// for NE on longs strings, if prefixes dont match, then long string wont
if(cmpPrefix) {
bitmap_set(b, i, 1);
continue;
}
// consult the full string
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
if(rc != SQLITE_OK) {
goto done;
}
if(nPrefix != nFull) {
rc = SQLITE_ERROR;
goto done;
}
bitmap_set(b, i, strncmp(sFull, sTarget, nFull) != 0);
}
break;
}
case VEC0_METADATA_OPERATOR_GT: {
for(int i = 0; i < size; i++) {
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
nPrefix = ((int*) view)[0];
sPrefix = (char *) &view[4];
int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
// if prefix match, check which is longer
if(cmpPrefix == 0) {
bitmap_set(b, i, nPrefix > nTarget);
}
else {
bitmap_set(b, i, cmpPrefix > 0);
}
continue;
}
// TODO(perf): may not need to compare full text in some cases
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
if(rc != SQLITE_OK) {
goto done;
}
if(nPrefix != nFull) {
rc = SQLITE_ERROR;
goto done;
}
bitmap_set(b, i, strncmp(sFull, sTarget, nFull) > 0);
}
break;
}
case VEC0_METADATA_OPERATOR_GE: {
for(int i = 0; i < size; i++) {
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
nPrefix = ((int*) view)[0];
sPrefix = (char *) &view[4];
int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
// if prefix match, check which is longer
if(cmpPrefix == 0) {
bitmap_set(b, i, nPrefix >= nTarget);
}
else {
bitmap_set(b, i, cmpPrefix >= 0);
}
continue;
}
// TODO(perf): may not need to compare full text in some cases
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
if(rc != SQLITE_OK) {
goto done;
}
if(nPrefix != nFull) {
rc = SQLITE_ERROR;
goto done;
}
bitmap_set(b, i, strncmp(sFull, sTarget, nFull) >= 0);
}
break;
}
case VEC0_METADATA_OPERATOR_LE: {
for(int i = 0; i < size; i++) {
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
nPrefix = ((int*) view)[0];
sPrefix = (char *) &view[4];
int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
2024-04-20 13:38:58 -07:00
if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
// if prefix match, check which is longer
if(cmpPrefix == 0) {
bitmap_set(b, i, nPrefix <= nTarget);
}
else {
bitmap_set(b, i, cmpPrefix <= 0);
}
continue;
}
// TODO(perf): may not need to compare full text in some cases
2024-04-20 13:38:58 -07:00
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
if(rc != SQLITE_OK) {
goto done;
}
if(nPrefix != nFull) {
rc = SQLITE_ERROR;
goto done;
}
bitmap_set(b, i, strncmp(sFull, sTarget, nFull) <= 0);
}
break;
2024-07-05 12:07:45 -07:00
}
case VEC0_METADATA_OPERATOR_LT: {
for(int i = 0; i < size; i++) {
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
nPrefix = ((int*) view)[0];
sPrefix = (char *) &view[4];
int cmpPrefix = strncmp(sPrefix, sTarget, min(min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH), nTarget));
if(nPrefix < VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
// if prefix match, check which is longer
if(cmpPrefix == 0) {
bitmap_set(b, i, nPrefix < nTarget);
}
else {
bitmap_set(b, i, cmpPrefix < 0);
}
continue;
}
// TODO(perf): may not need to compare full text in some cases
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
if(rc != SQLITE_OK) {
goto done;
}
if(nPrefix != nFull) {
rc = SQLITE_ERROR;
goto done;
}
bitmap_set(b, i, strncmp(sFull, sTarget, nFull) < 0);
2024-04-20 13:38:58 -07:00
}
break;
2024-04-20 13:38:58 -07:00
}
2024-07-05 12:07:45 -07:00
case VEC0_METADATA_OPERATOR_IN: {
size_t metadataInIdx = -1;
for(size_t i = 0; i < aMetadataIn->length; i++) {
struct Vec0MetadataIn * metadataIn = &(((struct Vec0MetadataIn *) aMetadataIn->z)[i]);
if(metadataIn->argv_idx == argv_idx) {
metadataInIdx = i;
break;
}
}
if(metadataInIdx < 0) {
rc = SQLITE_ERROR;
goto done;
}
2024-04-20 13:38:58 -07:00
struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
struct Array * aTarget = &(metadataIn->array);
2024-06-25 08:52:48 -07:00
int nPrefix;
char * sPrefix;
char *sFull;
int nFull;
u8 * view;
for(int i = 0; i < size; i++) {
view = &((u8*) buffer)[i * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
nPrefix = ((int*) view)[0];
sPrefix = (char *) &view[4];
for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
struct Vec0MetadataInTextEntry * entry = &(((struct Vec0MetadataInTextEntry*)aTarget->z)[target_idx]);
if(entry->n != nPrefix) {
continue;
}
int cmpPrefix = strncmp(sPrefix, entry->zString, min(nPrefix, VEC0_METADATA_TEXT_VIEW_DATA_LENGTH));
if(nPrefix <= VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
if(cmpPrefix == 0) {
bitmap_set(b, i, 1);
break;
}
continue;
}
if(cmpPrefix) {
continue;
}
2024-07-05 12:07:45 -07:00
rc = vec0_get_metadata_text_long_value(p, &stmt, metadata_idx, rowids[i], &nFull, &sFull);
if(rc != SQLITE_OK) {
goto done;
}
if(nPrefix != nFull) {
rc = SQLITE_ERROR;
goto done;
}
if(strncmp(sFull, entry->zString, nFull) == 0) {
bitmap_set(b, i, 1);
break;
}
}
}
break;
}
2024-07-05 12:07:45 -07:00
}
rc = SQLITE_OK;
2024-07-05 12:07:45 -07:00
done:
sqlite3_finalize(stmt);
sqlite3_free(rowids);
return rc;
2024-07-05 12:07:45 -07:00
2024-07-31 12:56:09 -07:00
}
2024-04-20 13:38:58 -07:00
/**
* @brief Fill in bitmap of chunk values, whether or not the values match a metadata constraint
*
* @param p vec0_vtab
* @param metadata_idx index of the metatadata column to perfrom constraints on
* @param value sqlite3_value of the constraints value
* @param blob sqlite3_blob that is already opened on the metdata column's shadow chunk table
* @param chunk_rowid rowid of the chunk to calculate on
* @param b pre-allocated and zero'd out bitmap to write results to
* @param size size of the chunk
* @return int SQLITE_OK on success, error code otherwise
*/
int vec0_set_metadata_filter_bitmap(
vec0_vtab *p,
int metadata_idx,
vec0_metadata_operator op,
sqlite3_value * value,
sqlite3_blob * blob,
i64 chunk_rowid,
u8* b,
int size,
struct Array * aMetadataIn, int argv_idx) {
// TODO: shouldn't this skip in-valid entries from the chunk's validity bitmap?
int rc;
rc = sqlite3_blob_reopen(blob, chunk_rowid);
if(rc != SQLITE_OK) {
return rc;
}
vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind;
int szMatch = 0;
int blobSize = sqlite3_blob_bytes(blob);
switch(kind) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
szMatch = blobSize == size / CHAR_BIT;
break;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER: {
szMatch = blobSize == size * sizeof(i64);
break;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT: {
szMatch = blobSize == size * sizeof(double);
break;
}
case VEC0_METADATA_COLUMN_KIND_TEXT: {
szMatch = blobSize == size * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH;
break;
}
}
if(!szMatch) {
return SQLITE_ERROR;
}
void * buffer = sqlite3_malloc(blobSize);
if(!buffer) {
return SQLITE_NOMEM;
}
rc = sqlite3_blob_read(blob, buffer, blobSize, 0);
if(rc != SQLITE_OK) {
goto done;
}
switch(kind) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
int target = sqlite3_value_int(value);
if( (target && op == VEC0_METADATA_OPERATOR_EQ) || (!target && op == VEC0_METADATA_OPERATOR_NE)) {
for(int i = 0; i < size; i++) { bitmap_set(b, i, bitmap_get((u8*) buffer, i)); }
}
else {
for(int i = 0; i < size; i++) { bitmap_set(b, i, !bitmap_get((u8*) buffer, i)); }
}
break;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER: {
i64 * array = (i64*) buffer;
i64 target = sqlite3_value_int64(value);
switch(op) {
case VEC0_METADATA_OPERATOR_EQ: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); }
break;
}
case VEC0_METADATA_OPERATOR_GT: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); }
break;
}
case VEC0_METADATA_OPERATOR_LE: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); }
break;
}
case VEC0_METADATA_OPERATOR_LT: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); }
break;
}
case VEC0_METADATA_OPERATOR_GE: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); }
break;
}
case VEC0_METADATA_OPERATOR_NE: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
break;
}
case VEC0_METADATA_OPERATOR_IN: {
int metadataInIdx = -1;
for(size_t i = 0; i < aMetadataIn->length; i++) {
struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
if(metadataIn->argv_idx == argv_idx) {
metadataInIdx = i;
break;
}
}
if(metadataInIdx < 0) {
rc = SQLITE_ERROR;
goto done;
}
struct Vec0MetadataIn * metadataIn = &((struct Vec0MetadataIn *) aMetadataIn->z)[metadataInIdx];
struct Array * aTarget = &(metadataIn->array);
for(int i = 0; i < size; i++) {
for(size_t target_idx = 0; target_idx < aTarget->length; target_idx++) {
if( ((i64*)aTarget->z)[target_idx] == array[i]) {
bitmap_set(b, i, 1);
break;
}
}
}
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
}
break;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT: {
double * array = (double*) buffer;
double target = sqlite3_value_double(value);
switch(op) {
case VEC0_METADATA_OPERATOR_EQ: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] == target); }
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_METADATA_OPERATOR_GT: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] > target); }
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_METADATA_OPERATOR_LE: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] <= target); }
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_METADATA_OPERATOR_LT: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] < target); }
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_METADATA_OPERATOR_GE: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] >= target); }
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_METADATA_OPERATOR_NE: {
for(int i = 0; i < size; i++) { bitmap_set(b, i, array[i] != target); }
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_METADATA_OPERATOR_IN: {
// should never be reached
break;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
}
break;
}
case VEC0_METADATA_COLUMN_KIND_TEXT: {
rc = vec0_metadata_filter_text(p, value, buffer, size, op, b, metadata_idx, chunk_rowid, aMetadataIn, argv_idx);
if(rc != SQLITE_OK) {
goto done;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
break;
}
}
done:
sqlite3_free(buffer);
return rc;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
struct VectorColumnDefinition *vector_column,
int vectorColumnIdx, struct Array *arrayRowidsIn,
struct Array * aMetadataIn,
const char * idxStr, int argc, sqlite3_value ** argv,
void *queryVector, i64 k, i64 **out_topk_rowids,
f32 **out_topk_distances, i64 *out_used) {
// for each chunk, get top min(k, chunk_size) rowid + distances to query vec.
// then reconcile all topk_chunks for a true top k.
// output only rowids + distances for now
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
int rc = SQLITE_OK;
sqlite3_blob *blobVectors = NULL;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
void *baseVectors = NULL; // memory: chunk_size * dimensions * element_size
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
// OWNED BY CALLER ON SUCCESS
i64 *topk_rowids = NULL; // memory: k * 4
// OWNED BY CALLER ON SUCCESS
f32 *topk_distances = NULL; // memory: k * 4
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
i64 *tmp_topk_rowids = NULL; // memory: k * 4
f32 *tmp_topk_distances = NULL; // memory: k * 4
f32 *chunk_distances = NULL; // memory: chunk_size * 4
u8 *b = NULL; // memory: chunk_size / 8
u8 *bTaken = NULL; // memory: chunk_size / 8
i32 *chunk_topk_idxs = NULL; // memory: k * 4
u8 *bmRowids = NULL; // memory: chunk_size / 8
u8 *bmMetadata = NULL; // memory: chunk_size / 8
// // total: a lot???
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
// 6 * (k * 4) + (k * 2) + (chunk_size / 8) + (chunk_size * dimensions * 4)
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
topk_rowids = sqlite3_malloc(k * sizeof(i64));
if (!topk_rowids) {
rc = SQLITE_NOMEM;
goto cleanup;
}
memset(topk_rowids, 0, k * sizeof(i64));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
topk_distances = sqlite3_malloc(k * sizeof(f32));
if (!topk_distances) {
rc = SQLITE_NOMEM;
goto cleanup;
}
memset(topk_distances, 0, k * sizeof(f32));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
tmp_topk_rowids = sqlite3_malloc(k * sizeof(i64));
if (!tmp_topk_rowids) {
rc = SQLITE_NOMEM;
goto cleanup;
}
memset(tmp_topk_rowids, 0, k * sizeof(i64));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
tmp_topk_distances = sqlite3_malloc(k * sizeof(f32));
if (!tmp_topk_distances) {
rc = SQLITE_NOMEM;
goto cleanup;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
memset(tmp_topk_distances, 0, k * sizeof(f32));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
i64 k_used = 0;
i64 baseVectorsSize = p->chunk_size * vector_column_byte_size(*vector_column);
baseVectors = sqlite3_malloc(baseVectorsSize);
if (!baseVectors) {
rc = SQLITE_NOMEM;
goto cleanup;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
chunk_distances = sqlite3_malloc(p->chunk_size * sizeof(f32));
if (!chunk_distances) {
rc = SQLITE_NOMEM;
goto cleanup;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
b = bitmap_new(p->chunk_size);
if (!b) {
rc = SQLITE_NOMEM;
goto cleanup;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
bTaken = bitmap_new(p->chunk_size);
if (!bTaken) {
rc = SQLITE_NOMEM;
goto cleanup;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
chunk_topk_idxs = sqlite3_malloc(k * sizeof(i32));
if (!chunk_topk_idxs) {
rc = SQLITE_NOMEM;
goto cleanup;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
bmRowids = arrayRowidsIn ? bitmap_new(p->chunk_size) : NULL;
if (arrayRowidsIn && !bmRowids) {
rc = SQLITE_NOMEM;
goto cleanup;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
sqlite3_blob * metadataBlobs[VEC0_MAX_METADATA_COLUMNS];
memset(metadataBlobs, 0, sizeof(sqlite3_blob*) * VEC0_MAX_METADATA_COLUMNS);
bmMetadata = bitmap_new(p->chunk_size);
if(!bmMetadata) {
rc = SQLITE_NOMEM;
goto cleanup;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
int idxStrLength = strlen(idxStr);
int numValueEntries = (idxStrLength-1) / 4;
assert(numValueEntries == argc);
int hasMetadataFilters = 0;
int hasDistanceConstraints = 0;
for(int i = 0; i < argc; i++) {
int idx = 1 + (i * 4);
char kind = idxStr[idx + 0];
if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
hasMetadataFilters = 1;
}
else if(kind == VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT) {
hasDistanceConstraints = 1;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
while (true) {
rc = sqlite3_step(stmtChunks);
if (rc == SQLITE_DONE) {
2024-07-05 12:07:45 -07:00
break;
}
if (rc != SQLITE_ROW) {
vtab_set_error(&p->base, "chunks iter error");
rc = SQLITE_ERROR;
goto cleanup;
}
memset(chunk_distances, 0, p->chunk_size * sizeof(f32));
memset(chunk_topk_idxs, 0, k * sizeof(i32));
bitmap_clear(b, p->chunk_size);
i64 chunk_id = sqlite3_column_int64(stmtChunks, 0);
unsigned char *chunkValidity =
(unsigned char *)sqlite3_column_blob(stmtChunks, 1);
i64 validitySize = sqlite3_column_bytes(stmtChunks, 1);
if (validitySize != p->chunk_size / CHAR_BIT) {
// IMP: V05271_22109
vtab_set_error(
&p->base,
"chunk validity size doesn't match - expected %lld, found %lld",
p->chunk_size / CHAR_BIT, validitySize);
rc = SQLITE_ERROR;
goto cleanup;
}
i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2);
i64 rowidsSize = sqlite3_column_bytes(stmtChunks, 2);
if (rowidsSize != p->chunk_size * sizeof(i64)) {
// IMP: V02796_19635
vtab_set_error(&p->base, "rowids size doesn't match");
vtab_set_error(
&p->base,
"chunk rowids size doesn't match - expected %lld, found %lld",
p->chunk_size * sizeof(i64), rowidsSize);
rc = SQLITE_ERROR;
goto cleanup;
}
// open the vector chunk blob for the current chunk
rc = sqlite3_blob_open(p->db, p->schemaName,
p->shadowVectorChunksNames[vectorColumnIdx],
"vectors", chunk_id, 0, &blobVectors);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "could not open vectors blob for chunk %lld",
chunk_id);
rc = SQLITE_ERROR;
goto cleanup;
}
i64 currentBaseVectorsSize = sqlite3_blob_bytes(blobVectors);
i64 expectedBaseVectorsSize =
p->chunk_size * vector_column_byte_size(*vector_column);
if (currentBaseVectorsSize != expectedBaseVectorsSize) {
// IMP: V16465_00535
vtab_set_error(
&p->base,
"vectors blob size doesn't match - expected %lld, found %lld",
expectedBaseVectorsSize, currentBaseVectorsSize);
rc = SQLITE_ERROR;
goto cleanup;
}
rc = sqlite3_blob_read(blobVectors, baseVectors, currentBaseVectorsSize, 0);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "vectors blob read error for %lld", chunk_id);
rc = SQLITE_ERROR;
goto cleanup;
}
bitmap_copy(b, chunkValidity, p->chunk_size);
if (arrayRowidsIn) {
bitmap_clear(bmRowids, p->chunk_size);
for (int i = 0; i < p->chunk_size; i++) {
if (!bitmap_get(chunkValidity, i)) {
continue;
}
i64 rowid = chunkRowids[i];
void *in = bsearch(&rowid, arrayRowidsIn->z, arrayRowidsIn->length,
sizeof(i64), _cmp);
bitmap_set(bmRowids, i, in ? 1 : 0);
}
bitmap_and_inplace(b, bmRowids, p->chunk_size);
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(hasMetadataFilters) {
for(int i = 0; i < argc; i++) {
int idx = 1 + (i * 4);
char kind = idxStr[idx + 0];
if(kind != VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
continue;
}
int metadata_idx = idxStr[idx + 1] - 'A';
int operator = idxStr[idx + 2];
if(!metadataBlobs[metadata_idx]) {
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 0, &metadataBlobs[metadata_idx]);
vtab_set_error(&p->base, "Could not open metadata blob");
if(rc != SQLITE_OK) {
goto cleanup;
}
}
bitmap_clear(bmMetadata, p->chunk_size);
rc = vec0_set_metadata_filter_bitmap(p, metadata_idx, operator, argv[i], metadataBlobs[metadata_idx], chunk_id, bmMetadata, p->chunk_size, aMetadataIn, i);
if(rc != SQLITE_OK) {
vtab_set_error(&p->base, "Could not filter metadata fields");
if(rc != SQLITE_OK) {
goto cleanup;
}
}
bitmap_and_inplace(b, bmMetadata, p->chunk_size);
}
}
2024-07-05 12:07:45 -07:00
for (int i = 0; i < p->chunk_size; i++) {
if (!bitmap_get(b, i)) {
continue;
};
f32 result;
switch (vector_column->element_type) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
const f32 *base_i =
((f32 *)baseVectors) + (i * vector_column->dimensions);
switch (vector_column->distance_metric) {
case VEC0_DISTANCE_METRIC_L2: {
result = distance_l2_sqr_float(base_i, (f32 *)queryVector,
&vector_column->dimensions);
break;
}
2024-07-23 14:04:17 -07:00
case VEC0_DISTANCE_METRIC_L1: {
result = distance_l1_f32(base_i, (f32 *)queryVector,
2024-07-23 23:57:42 -07:00
&vector_column->dimensions);
2024-07-23 14:04:17 -07:00
break;
}
2024-07-05 12:07:45 -07:00
case VEC0_DISTANCE_METRIC_COSINE: {
result = distance_cosine_float(base_i, (f32 *)queryVector,
&vector_column->dimensions);
break;
}
}
break;
}
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
const i8 *base_i =
((i8 *)baseVectors) + (i * vector_column->dimensions);
switch (vector_column->distance_metric) {
case VEC0_DISTANCE_METRIC_L2: {
result = distance_l2_sqr_int8(base_i, (i8 *)queryVector,
&vector_column->dimensions);
break;
}
2024-07-23 14:04:17 -07:00
case VEC0_DISTANCE_METRIC_L1: {
result = distance_l1_int8(base_i, (i8 *)queryVector,
2024-07-23 23:57:42 -07:00
&vector_column->dimensions);
2024-07-23 14:04:17 -07:00
break;
}
2024-07-05 12:07:45 -07:00
case VEC0_DISTANCE_METRIC_COSINE: {
result = distance_cosine_int8(base_i, (i8 *)queryVector,
&vector_column->dimensions);
break;
}
}
break;
}
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
const u8 *base_i =
((u8 *)baseVectors) + (i * (vector_column->dimensions / CHAR_BIT));
result = distance_hamming(base_i, (u8 *)queryVector,
&vector_column->dimensions);
break;
}
}
chunk_distances[i] = result;
}
if(hasDistanceConstraints) {
for(int i = 0; i < argc; i++) {
int idx = 1 + (i * 4);
char kind = idxStr[idx + 0];
// TODO casts f64 to f32, is that a problem?
f32 target = (f32) sqlite3_value_double(argv[i]);
if(kind != VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT) {
continue;
}
vec0_distance_constraint_operator op = idxStr[idx + 1];
switch(op) {
case VEC0_DISTANCE_CONSTRAINT_GE: {
for(int i = 0; i < p->chunk_size;i++) {
if(bitmap_get(b, i) && !(chunk_distances[i] >= target)) {
bitmap_set(b, i, 0);
}
}
break;
}
case VEC0_DISTANCE_CONSTRAINT_GT: {
for(int i = 0; i < p->chunk_size;i++) {
if(bitmap_get(b, i) && !(chunk_distances[i] > target)) {
bitmap_set(b, i, 0);
}
}
break;
}
case VEC0_DISTANCE_CONSTRAINT_LE: {
for(int i = 0; i < p->chunk_size;i++) {
if(bitmap_get(b, i) && !(chunk_distances[i] <= target)) {
bitmap_set(b, i, 0);
}
}
break;
}
case VEC0_DISTANCE_CONSTRAINT_LT: {
for(int i = 0; i < p->chunk_size;i++) {
if(bitmap_get(b, i) && !(chunk_distances[i] < target)) {
bitmap_set(b, i, 0);
}
}
break;
}
}
}
}
2024-07-05 12:07:45 -07:00
int used1;
min_idx(chunk_distances, p->chunk_size, b, chunk_topk_idxs,
min(k, p->chunk_size), bTaken, &used1);
i64 used;
merge_sorted_lists(topk_distances, topk_rowids, k_used, chunk_distances,
chunkRowids, chunk_topk_idxs,
min(min(k, p->chunk_size), used1), tmp_topk_distances,
tmp_topk_rowids, k, &used);
for (int i = 0; i < used; i++) {
topk_rowids[i] = tmp_topk_rowids[i];
topk_distances[i] = tmp_topk_distances[i];
}
k_used = used;
2024-07-23 23:57:42 -07:00
// blobVectors is always opened with read-only permissions, so this never
// fails.
2024-07-05 12:07:45 -07:00
sqlite3_blob_close(blobVectors);
blobVectors = NULL;
}
*out_topk_rowids = topk_rowids;
*out_topk_distances = topk_distances;
*out_used = k_used;
rc = SQLITE_OK;
cleanup:
if (rc != SQLITE_OK) {
sqlite3_free(topk_rowids);
sqlite3_free(topk_distances);
}
sqlite3_free(chunk_topk_idxs);
sqlite3_free(tmp_topk_rowids);
sqlite3_free(tmp_topk_distances);
sqlite3_free(b);
sqlite3_free(bTaken);
sqlite3_free(bmRowids);
sqlite3_free(baseVectors);
sqlite3_free(chunk_distances);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
sqlite3_free(bmMetadata);
for(int i = 0; i < VEC0_MAX_METADATA_COLUMNS; i++) {
sqlite3_blob_close(metadataBlobs[i]);
}
2024-07-23 23:57:42 -07:00
// blobVectors is always opened with read-only permissions, so this never
// fails.
2024-07-05 12:07:45 -07:00
sqlite3_blob_close(blobVectors);
return rc;
}
#if SQLITE_VEC_ENABLE_RESCORE
#include "sqlite-vec-rescore.c"
#endif
#if SQLITE_VEC_ENABLE_DISKANN
/**
* Handle a KNN query using the DiskANN graph search.
*/
static int vec0Filter_knn_diskann(
vec0_cursor *pCur, vec0_vtab *p, int idxNum,
const char *idxStr, int argc, sqlite3_value **argv) {
int rc;
int vectorColumnIdx = idxNum;
struct VectorColumnDefinition *vector_column = &p->vector_columns[vectorColumnIdx];
struct vec0_query_knn_data *knn_data;
knn_data = sqlite3_malloc(sizeof(*knn_data));
if (!knn_data) return SQLITE_NOMEM;
memset(knn_data, 0, sizeof(*knn_data));
// Parse query_idx and k_idx from idxStr
int query_idx = -1;
int k_idx = -1;
for (int i = 0; i < argc; i++) {
if (idxStr[1 + (i * 4)] == VEC0_IDXSTR_KIND_KNN_MATCH) {
query_idx = i;
}
if (idxStr[1 + (i * 4)] == VEC0_IDXSTR_KIND_KNN_K) {
k_idx = i;
}
}
assert(query_idx >= 0);
assert(k_idx >= 0);
// Extract query vector
void *queryVector;
size_t dimensions;
enum VectorElementType elementType;
vector_cleanup queryVectorCleanup = vector_cleanup_noop;
char *pzError;
rc = vector_from_value(argv[query_idx], &queryVector, &dimensions,
&elementType, &queryVectorCleanup, &pzError);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "Invalid query vector: %z", pzError);
sqlite3_free(knn_data);
return SQLITE_ERROR;
}
if (elementType != vector_column->element_type ||
dimensions != vector_column->dimensions) {
vtab_set_error(&p->base, "Query vector type/dimension mismatch");
queryVectorCleanup(queryVector);
sqlite3_free(knn_data);
return SQLITE_ERROR;
}
i64 k = sqlite3_value_int64(argv[k_idx]);
if (k <= 0) {
knn_data->k = 0;
knn_data->k_used = 0;
pCur->knn_data = knn_data;
pCur->query_plan = VEC0_QUERY_PLAN_KNN;
queryVectorCleanup(queryVector);
return SQLITE_OK;
}
// Run DiskANN search
i64 *resultRowids = sqlite3_malloc(k * sizeof(i64));
f32 *resultDistances = sqlite3_malloc(k * sizeof(f32));
if (!resultRowids || !resultDistances) {
sqlite3_free(resultRowids);
sqlite3_free(resultDistances);
queryVectorCleanup(queryVector);
sqlite3_free(knn_data);
return SQLITE_NOMEM;
}
int resultCount;
rc = diskann_search(p, vectorColumnIdx, queryVector, dimensions,
elementType, (int)k, 0,
resultRowids, resultDistances, &resultCount);
if (rc != SQLITE_OK) {
queryVectorCleanup(queryVector);
sqlite3_free(resultRowids);
sqlite3_free(resultDistances);
sqlite3_free(knn_data);
return rc;
}
// Scan _diskann_buffer for any buffered (unflushed) vectors and merge
// with graph results. This ensures no recall loss for buffered vectors.
{
sqlite3_stmt *bufStmt = NULL;
char *zSql = sqlite3_mprintf(
"SELECT rowid, vector FROM " VEC0_SHADOW_DISKANN_BUFFER_N_NAME,
p->schemaName, p->tableName, vectorColumnIdx);
if (!zSql) {
queryVectorCleanup(queryVector);
sqlite3_free(resultRowids);
sqlite3_free(resultDistances);
sqlite3_free(knn_data);
return SQLITE_NOMEM;
}
int bufRc = sqlite3_prepare_v2(p->db, zSql, -1, &bufStmt, NULL);
sqlite3_free(zSql);
if (bufRc == SQLITE_OK) {
while (sqlite3_step(bufStmt) == SQLITE_ROW) {
i64 bufRowid = sqlite3_column_int64(bufStmt, 0);
const void *bufVec = sqlite3_column_blob(bufStmt, 1);
f32 dist = vec0_distance_full(
queryVector, bufVec, dimensions, elementType,
vector_column->distance_metric);
// Check if this buffer vector should replace the worst graph result
if (resultCount < (int)k) {
// Still have room, just add it
resultRowids[resultCount] = bufRowid;
resultDistances[resultCount] = dist;
resultCount++;
} else {
// Find worst (largest distance) in results
int worstIdx = 0;
for (int wi = 1; wi < resultCount; wi++) {
if (resultDistances[wi] > resultDistances[worstIdx]) {
worstIdx = wi;
}
}
if (dist < resultDistances[worstIdx]) {
resultRowids[worstIdx] = bufRowid;
resultDistances[worstIdx] = dist;
}
}
}
sqlite3_finalize(bufStmt);
}
}
queryVectorCleanup(queryVector);
// Sort results by distance (ascending)
for (int si = 0; si < resultCount - 1; si++) {
for (int sj = si + 1; sj < resultCount; sj++) {
if (resultDistances[sj] < resultDistances[si]) {
f32 tmpD = resultDistances[si];
resultDistances[si] = resultDistances[sj];
resultDistances[sj] = tmpD;
i64 tmpR = resultRowids[si];
resultRowids[si] = resultRowids[sj];
resultRowids[sj] = tmpR;
}
}
}
knn_data->k = resultCount;
knn_data->k_used = resultCount;
knn_data->rowids = resultRowids;
knn_data->distances = resultDistances;
knn_data->current_idx = 0;
pCur->knn_data = knn_data;
pCur->query_plan = VEC0_QUERY_PLAN_KNN;
return SQLITE_OK;
}
#endif /* SQLITE_VEC_ENABLE_DISKANN */
2024-04-20 13:38:58 -07:00
int vec0Filter_knn(vec0_cursor *pCur, vec0_vtab *p, int idxNum,
const char *idxStr, int argc, sqlite3_value **argv) {
assert(argc == (strlen(idxStr)-1) / 4);
int rc;
struct vec0_query_knn_data *knn_data;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
int vectorColumnIdx = idxNum;
struct VectorColumnDefinition *vector_column =
&p->vector_columns[vectorColumnIdx];
2024-04-20 13:38:58 -07:00
#if SQLITE_VEC_ENABLE_DISKANN
// DiskANN dispatch
if (vector_column->index_type == VEC0_INDEX_TYPE_DISKANN) {
return vec0Filter_knn_diskann(pCur, p, idxNum, idxStr, argc, argv);
}
#endif
struct Array *arrayRowidsIn = NULL;
sqlite3_stmt *stmtChunks = NULL;
void *queryVector;
size_t dimensions;
enum VectorElementType elementType;
vector_cleanup queryVectorCleanup = vector_cleanup_noop;
char *pzError;
knn_data = sqlite3_malloc(sizeof(*knn_data));
if (!knn_data) {
return SQLITE_NOMEM;
2024-04-20 13:38:58 -07:00
}
memset(knn_data, 0, sizeof(*knn_data));
// array of `struct Vec0MetadataIn`, IF there are any `xxx in (...)` metadata constraints
struct Array * aMetadataIn = NULL;
int query_idx =-1;
int k_idx = -1;
int rowid_in_idx = -1;
for(int i = 0; i < argc; i++) {
if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_MATCH) {
query_idx = i;
}
if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_K) {
k_idx = i;
}
if(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_KNN_ROWID_IN) {
rowid_in_idx = i;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
}
assert(query_idx >= 0);
assert(k_idx >= 0);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
// make sure the query vector matches the vector column (type dimensions etc.)
rc = vector_from_value(argv[query_idx], &queryVector, &dimensions, &elementType,
&queryVectorCleanup, &pzError);
2024-04-20 13:38:58 -07:00
if (rc != SQLITE_OK) {
vtab_set_error(&p->base,
"Query vector on the \"%.*s\" column is invalid: %z",
vector_column->name_length, vector_column->name, pzError);
rc = SQLITE_ERROR;
goto cleanup;
2024-04-20 13:38:58 -07:00
}
if (elementType != vector_column->element_type) {
vtab_set_error(
&p->base,
"Query vector for the \"%.*s\" column is expected to be of type "
"%s, but a %s vector was provided.",
vector_column->name_length, vector_column->name,
vector_subtype_name(vector_column->element_type),
vector_subtype_name(elementType));
rc = SQLITE_ERROR;
goto cleanup;
2024-04-20 13:38:58 -07:00
}
if (dimensions != vector_column->dimensions) {
vtab_set_error(
&p->base,
"Dimension mismatch for query vector for the \"%.*s\" column. "
"Expected %d dimensions but received %d.",
vector_column->name_length, vector_column->name,
vector_column->dimensions, dimensions);
rc = SQLITE_ERROR;
goto cleanup;
2024-04-20 13:38:58 -07:00
}
i64 k = sqlite3_value_int64(argv[k_idx]);
if (k < 0) {
vtab_set_error(
&p->base, "k value in knn queries must be greater than or equal to 0.");
rc = SQLITE_ERROR;
goto cleanup;
}
#define SQLITE_VEC_VEC0_K_MAX 4096
if (k > SQLITE_VEC_VEC0_K_MAX) {
vtab_set_error(
&p->base,
"k value in knn query too large, provided %lld and the limit is %lld",
k, SQLITE_VEC_VEC0_K_MAX);
rc = SQLITE_ERROR;
goto cleanup;
2024-04-20 13:38:58 -07:00
}
if (k == 0) {
knn_data->k = 0;
pCur->knn_data = knn_data;
pCur->query_plan = VEC0_QUERY_PLAN_KNN;
rc = SQLITE_OK;
goto cleanup;
}
2024-04-20 13:38:58 -07:00
// handle when a `rowid in (...)` operation was provided
// Array of all the rowids that appear in any `rowid in (...)` constraint.
// NULL if none were provided, which means a "full" scan.
#if COMPILER_SUPPORTS_VTAB_IN
if (rowid_in_idx >= 0) {
sqlite3_value *item;
int rc;
arrayRowidsIn = sqlite3_malloc(sizeof(*arrayRowidsIn));
if (!arrayRowidsIn) {
rc = SQLITE_NOMEM;
goto cleanup;
}
memset(arrayRowidsIn, 0, sizeof(*arrayRowidsIn));
2024-04-20 13:38:58 -07:00
rc = array_init(arrayRowidsIn, sizeof(i64), 32);
if (rc != SQLITE_OK) {
goto cleanup;
}
for (rc = sqlite3_vtab_in_first(argv[rowid_in_idx], &item); rc == SQLITE_OK && item;
rc = sqlite3_vtab_in_next(argv[rowid_in_idx], &item)) {
i64 rowid;
if (p->pkIsText) {
rc = vec0_rowid_from_id(p, item, &rowid);
if (rc != SQLITE_OK) {
goto cleanup;
}
} else {
rowid = sqlite3_value_int64(item);
}
rc = array_append(arrayRowidsIn, &rowid);
if (rc != SQLITE_OK) {
goto cleanup;
}
}
if (rc != SQLITE_DONE) {
vtab_set_error(&p->base, "error processing rowid in (...) array");
goto cleanup;
}
qsort(arrayRowidsIn->z, arrayRowidsIn->length, arrayRowidsIn->element_size,
_cmp);
}
#endif
2024-04-20 13:38:58 -07:00
#if COMPILER_SUPPORTS_VTAB_IN
for(int i = 0; i < argc; i++) {
if(!(idxStr[1 + (i*4)] == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT && idxStr[1 + (i*4) + 2] == VEC0_METADATA_OPERATOR_IN)) {
continue;
}
int metadata_idx = idxStr[1 + (i*4) + 1] - 'A';
if(!aMetadataIn) {
aMetadataIn = sqlite3_malloc(sizeof(*aMetadataIn));
if(!aMetadataIn) {
rc = SQLITE_NOMEM;
goto cleanup;
}
memset(aMetadataIn, 0, sizeof(*aMetadataIn));
rc = array_init(aMetadataIn, sizeof(struct Vec0MetadataIn), 8);
if(rc != SQLITE_OK) {
goto cleanup;
}
}
struct Vec0MetadataIn item;
memset(&item, 0, sizeof(item));
item.metadata_idx=metadata_idx;
item.argv_idx = i;
switch(p->metadata_columns[metadata_idx].kind) {
case VEC0_METADATA_COLUMN_KIND_INTEGER: {
rc = array_init(&item.array, sizeof(i64), 16);
if(rc != SQLITE_OK) {
goto cleanup;
}
sqlite3_value *entry;
for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) {
i64 v = sqlite3_value_int64(entry);
rc = array_append(&item.array, &v);
if (rc != SQLITE_OK) {
goto cleanup;
}
}
if (rc != SQLITE_DONE) {
vtab_set_error(&p->base, "Error fetching next value in `x in (...)` integer expression");
goto cleanup;
}
break;
}
case VEC0_METADATA_COLUMN_KIND_TEXT: {
rc = array_init(&item.array, sizeof(struct Vec0MetadataInTextEntry), 16);
if(rc != SQLITE_OK) {
goto cleanup;
}
sqlite3_value *entry;
for (rc = sqlite3_vtab_in_first(argv[i], &entry); rc == SQLITE_OK && entry; rc = sqlite3_vtab_in_next(argv[i], &entry)) {
const char * s = (const char *) sqlite3_value_text(entry);
int n = sqlite3_value_bytes(entry);
2024-04-20 13:38:58 -07:00
struct Vec0MetadataInTextEntry entry;
entry.zString = sqlite3_mprintf("%.*s", n, s);
if(!entry.zString) {
rc = SQLITE_NOMEM;
goto cleanup;
}
entry.n = n;
rc = array_append(&item.array, &entry);
if (rc != SQLITE_OK) {
goto cleanup;
}
}
2024-04-20 13:38:58 -07:00
if (rc != SQLITE_DONE) {
vtab_set_error(&p->base, "Error fetching next value in `x in (...)` text expression");
goto cleanup;
}
2024-04-20 13:38:58 -07:00
break;
}
default: {
vtab_set_error(&p->base, "Internal sqlite-vec error");
goto cleanup;
}
}
2024-04-20 13:38:58 -07:00
rc = array_append(aMetadataIn, &item);
if(rc != SQLITE_OK) {
goto cleanup;
}
}
#endif
2024-04-20 13:38:58 -07:00
#if SQLITE_VEC_ENABLE_RESCORE
// Dispatch to rescore KNN path if this vector column has rescore enabled
if (vector_column->index_type == VEC0_INDEX_TYPE_RESCORE) {
rc = rescore_knn(p, pCur, vector_column, vectorColumnIdx, arrayRowidsIn,
aMetadataIn, idxStr, argc, argv, queryVector, k, knn_data);
if (rc != SQLITE_OK) {
goto cleanup;
}
pCur->knn_data = knn_data;
pCur->query_plan = VEC0_QUERY_PLAN_KNN;
rc = SQLITE_OK;
goto cleanup;
}
#endif
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// IVF dispatch: if vector column has IVF, use IVF query instead of chunk scan
if (vector_column->index_type == VEC0_INDEX_TYPE_IVF) {
rc = ivf_query_knn(p, vectorColumnIdx, queryVector,
(int)vector_column_byte_size(*vector_column), k, knn_data);
if (rc != SQLITE_OK) {
goto cleanup;
}
pCur->knn_data = knn_data;
pCur->query_plan = VEC0_QUERY_PLAN_KNN;
rc = SQLITE_OK;
goto cleanup;
}
#endif
rc = vec0_chunks_iter(p, idxStr, argc, argv, &stmtChunks);
if (rc != SQLITE_OK) {
// IMP: V06942_23781
vtab_set_error(&p->base, "Error preparing stmtChunk: %s",
sqlite3_errmsg(p->db));
goto cleanup;
}
2024-04-20 13:38:58 -07:00
i64 *topk_rowids = NULL;
f32 *topk_distances = NULL;
i64 k_used = 0;
rc = vec0Filter_knn_chunks_iter(p, stmtChunks, vector_column, vectorColumnIdx,
arrayRowidsIn, aMetadataIn, idxStr, argc, argv, queryVector, k, &topk_rowids,
&topk_distances, &k_used);
if (rc != SQLITE_OK) {
goto cleanup;
}
2024-04-20 13:38:58 -07:00
knn_data->current_idx = 0;
knn_data->k = k;
knn_data->rowids = topk_rowids;
knn_data->distances = topk_distances;
knn_data->k_used = k_used;
2024-04-20 13:38:58 -07:00
pCur->knn_data = knn_data;
pCur->query_plan = VEC0_QUERY_PLAN_KNN;
rc = SQLITE_OK;
cleanup:
sqlite3_finalize(stmtChunks);
array_cleanup(arrayRowidsIn);
sqlite3_free(arrayRowidsIn);
queryVectorCleanup(queryVector);
if(aMetadataIn) {
for(size_t i = 0; i < aMetadataIn->length; i++) {
struct Vec0MetadataIn* item = &((struct Vec0MetadataIn *) aMetadataIn->z)[i];
for(size_t j = 0; j < item->array.length; j++) {
if(p->metadata_columns[item->metadata_idx].kind == VEC0_METADATA_COLUMN_KIND_TEXT) {
struct Vec0MetadataInTextEntry entry = ((struct Vec0MetadataInTextEntry*)item->array.z)[j];
sqlite3_free(entry.zString);
}
2024-04-20 13:38:58 -07:00
}
array_cleanup(&item->array);
2024-04-20 13:38:58 -07:00
}
array_cleanup(aMetadataIn);
2024-04-20 13:38:58 -07:00
}
sqlite3_free(aMetadataIn);
2024-04-20 13:38:58 -07:00
if (rc != SQLITE_OK) {
sqlite3_free(knn_data);
}
return rc;
2024-04-20 13:38:58 -07:00
}
int vec0Filter_fullscan(vec0_vtab *p, vec0_cursor *pCur) {
int rc;
char *zSql;
struct vec0_query_fullscan_data *fullscan_data;
2024-04-20 13:38:58 -07:00
fullscan_data = sqlite3_malloc(sizeof(*fullscan_data));
if (!fullscan_data) {
return SQLITE_NOMEM;
2024-04-20 13:38:58 -07:00
}
memset(fullscan_data, 0, sizeof(*fullscan_data));
2024-04-20 13:38:58 -07:00
zSql = sqlite3_mprintf(" SELECT rowid "
" FROM " VEC0_SHADOW_ROWIDS_NAME
" ORDER by chunk_id, chunk_offset ",
p->schemaName, p->tableName);
if (!zSql) {
rc = SQLITE_NOMEM;
goto error;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &fullscan_data->rowids_stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) {
// IMP: V09901_26739
vtab_set_error(&p->base, "Error preparing rowid scan: %s",
sqlite3_errmsg(p->db));
goto error;
}
2024-04-20 13:38:58 -07:00
rc = sqlite3_step(fullscan_data->rowids_stmt);
// DONE when there's no rowids, ROW when there are, both "success"
if (!(rc == SQLITE_ROW || rc == SQLITE_DONE)) {
goto error;
}
2024-04-20 13:38:58 -07:00
fullscan_data->done = rc == SQLITE_DONE;
pCur->query_plan = VEC0_QUERY_PLAN_FULLSCAN;
pCur->fullscan_data = fullscan_data;
return SQLITE_OK;
2024-04-20 13:38:58 -07:00
error:
vec0_query_fullscan_data_clear(fullscan_data);
sqlite3_free(fullscan_data);
return rc;
}
int vec0Filter_point(vec0_cursor *pCur, vec0_vtab *p, int argc,
sqlite3_value **argv) {
int rc;
assert(argc == 1);
i64 rowid;
struct vec0_query_point_data *point_data = NULL;
point_data = sqlite3_malloc(sizeof(*point_data));
if (!point_data) {
rc = SQLITE_NOMEM;
goto error;
}
memset(point_data, 0, sizeof(*point_data));
if (p->pkIsText) {
rc = vec0_rowid_from_id(p, argv[0], &rowid);
if (rc == SQLITE_EMPTY) {
goto eof;
}
if (rc != SQLITE_OK) {
goto error;
}
} else {
rowid = sqlite3_value_int64(argv[0]);
}
for (int i = 0; i < p->numVectorColumns; i++) {
rc = vec0_get_vector_data(p, rowid, i, &point_data->vectors[i], NULL);
if (rc == SQLITE_EMPTY) {
goto eof;
}
2024-07-23 23:57:42 -07:00
if (rc != SQLITE_OK) {
goto error;
2024-07-23 23:57:28 -07:00
}
2024-04-20 13:38:58 -07:00
}
point_data->rowid = rowid;
point_data->done = 0;
pCur->point_data = point_data;
pCur->query_plan = VEC0_QUERY_PLAN_POINT;
return SQLITE_OK;
eof:
point_data->rowid = rowid;
point_data->done = 1;
pCur->point_data = point_data;
pCur->query_plan = VEC0_QUERY_PLAN_POINT;
return SQLITE_OK;
error:
vec0_query_point_data_clear(point_data);
sqlite3_free(point_data);
return rc;
}
static int vec0Filter(sqlite3_vtab_cursor *pVtabCursor, int idxNum,
const char *idxStr, int argc, sqlite3_value **argv) {
vec0_vtab *p = (vec0_vtab *)pVtabCursor->pVtab;
vec0_cursor *pCur = (vec0_cursor *)pVtabCursor;
vec0_cursor_clear(pCur);
int idxStrLength = strlen(idxStr);
if(idxStrLength <= 0) {
return SQLITE_ERROR;
}
if((idxStrLength-1) % 4 != 0) {
return SQLITE_ERROR;
}
int numValueEntries = (idxStrLength-1) / 4;
if(numValueEntries != argc) {
return SQLITE_ERROR;
}
char query_plan = idxStr[0];
switch(query_plan) {
case VEC0_QUERY_PLAN_FULLSCAN:
return vec0Filter_fullscan(p, pCur);
case VEC0_QUERY_PLAN_KNN:
return vec0Filter_knn(pCur, p, idxNum, idxStr, argc, argv);
case VEC0_QUERY_PLAN_POINT:
return vec0Filter_point(pCur, p, argc, argv);
default:
vtab_set_error(pVtabCursor->pVtab, "unknown idxStr '%s'", idxStr);
return SQLITE_ERROR;
}
}
static int vec0Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
vec0_cursor *pCur = (vec0_cursor *)cur;
switch (pCur->query_plan) {
case VEC0_QUERY_PLAN_FULLSCAN: {
*pRowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0);
return SQLITE_OK;
}
case VEC0_QUERY_PLAN_POINT: {
*pRowid = pCur->point_data->rowid;
return SQLITE_OK;
}
case VEC0_QUERY_PLAN_KNN: {
vtab_set_error(cur->pVtab,
"Internal sqlite-vec error: expected point query plan in "
"vec0Rowid, found %d",
pCur->query_plan);
return SQLITE_ERROR;
}
2024-07-23 23:57:42 -07:00
}
return SQLITE_ERROR;
2024-04-20 13:38:58 -07:00
}
static int vec0Next(sqlite3_vtab_cursor *cur) {
vec0_cursor *pCur = (vec0_cursor *)cur;
switch (pCur->query_plan) {
case VEC0_QUERY_PLAN_FULLSCAN: {
if (!pCur->fullscan_data) {
return SQLITE_ERROR;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
int rc = sqlite3_step(pCur->fullscan_data->rowids_stmt);
if (rc == SQLITE_DONE) {
pCur->fullscan_data->done = 1;
return SQLITE_OK;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
if (rc == SQLITE_ROW) {
return SQLITE_OK;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
return SQLITE_ERROR;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_QUERY_PLAN_KNN: {
if (!pCur->knn_data) {
return SQLITE_ERROR;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
pCur->knn_data->current_idx++;
return SQLITE_OK;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
case VEC0_QUERY_PLAN_POINT: {
if (!pCur->point_data) {
return SQLITE_ERROR;
}
pCur->point_data->done = 1;
return SQLITE_OK;
}
}
return SQLITE_ERROR;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
static int vec0Eof(sqlite3_vtab_cursor *cur) {
vec0_cursor *pCur = (vec0_cursor *)cur;
switch (pCur->query_plan) {
case VEC0_QUERY_PLAN_FULLSCAN: {
if (!pCur->fullscan_data) {
return 1;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
return pCur->fullscan_data->done;
}
case VEC0_QUERY_PLAN_KNN: {
if (!pCur->knn_data) {
return 1;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
// return (pCur->knn_data->current_idx >= pCur->knn_data->k) ||
// (pCur->knn_data->distances[pCur->knn_data->current_idx] == FLT_MAX);
return (pCur->knn_data->current_idx >= pCur->knn_data->k_used);
}
case VEC0_QUERY_PLAN_POINT: {
if (!pCur->point_data) {
return 1;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
return pCur->point_data->done;
}
}
return 1;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
static int vec0Column_fullscan(vec0_vtab *pVtab, vec0_cursor *pCur,
sqlite3_context *context, int i) {
if (!pCur->fullscan_data) {
sqlite3_result_error(
context, "Internal sqlite-vec error: fullscan_data is NULL.", -1);
return SQLITE_ERROR;
}
i64 rowid = sqlite3_column_int64(pCur->fullscan_data->rowids_stmt, 0);
if (i == VEC0_COLUMN_ID) {
return vec0_result_id(pVtab, context, rowid);
}
else if (vec0_column_idx_is_vector(pVtab, i)) {
void *v;
int sz;
int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
int rc = vec0_get_vector_data(pVtab, rowid, vector_idx, &v, &sz);
if (rc != SQLITE_OK) {
return rc;
}
sqlite3_result_blob(context, v, sz, sqlite3_free);
sqlite3_result_subtype(context,
pVtab->vector_columns[vector_idx].element_type);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
else if (i == vec0_column_distance_idx(pVtab)) {
sqlite3_result_null(context);
}
else if(vec0_column_idx_is_partition(pVtab, i)) {
int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
sqlite3_value * v;
int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
if(rc == SQLITE_OK) {
sqlite3_result_value(context, v);
sqlite3_value_free(v);
}else {
sqlite3_result_error_code(context, rc);
}
}
else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
sqlite3_value * v;
int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
if(rc == SQLITE_OK) {
sqlite3_result_value(context, v);
sqlite3_value_free(v);
}else {
sqlite3_result_error_code(context, rc);
}
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
else if(vec0_column_idx_is_metadata(pVtab, i)) {
if(sqlite3_vtab_nochange(context)) {
return SQLITE_OK;
}
int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
if(rc != SQLITE_OK) {
// IMP: V15466_32305
const char * zErr = sqlite3_mprintf(
"Could not extract metadata value for column %.*s at rowid %lld",
pVtab->metadata_columns[metadata_idx].name_length,
pVtab->metadata_columns[metadata_idx].name, rowid
);
if(zErr) {
sqlite3_result_error(context, zErr, -1);
sqlite3_free((void *) zErr);
}else {
sqlite3_result_error_nomem(context);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
}
}
return SQLITE_OK;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
static int vec0Column_point(vec0_vtab *pVtab, vec0_cursor *pCur,
sqlite3_context *context, int i) {
if (!pCur->point_data) {
sqlite3_result_error(context,
"Internal sqlite-vec error: point_data is NULL.", -1);
return SQLITE_ERROR;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
if (i == VEC0_COLUMN_ID) {
return vec0_result_id(pVtab, context, pCur->point_data->rowid);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
else if (i == vec0_column_distance_idx(pVtab)) {
sqlite3_result_null(context);
return SQLITE_OK;
}
else if (vec0_column_idx_is_vector(pVtab, i)) {
if (sqlite3_vtab_nochange(context)) {
sqlite3_result_null(context);
return SQLITE_OK;
}
int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
sqlite3_result_blob(
context, pCur->point_data->vectors[vector_idx],
vector_column_byte_size(pVtab->vector_columns[vector_idx]),
SQLITE_TRANSIENT);
sqlite3_result_subtype(context,
pVtab->vector_columns[vector_idx].element_type);
return SQLITE_OK;
}
else if(vec0_column_idx_is_partition(pVtab, i)) {
if(sqlite3_vtab_nochange(context)) {
return SQLITE_OK;
}
int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
i64 rowid = pCur->point_data->rowid;
sqlite3_value * v;
int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
if(rc == SQLITE_OK) {
sqlite3_result_value(context, v);
sqlite3_value_free(v);
}else {
sqlite3_result_error_code(context, rc);
}
}
else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
if(sqlite3_vtab_nochange(context)) {
return SQLITE_OK;
}
i64 rowid = pCur->point_data->rowid;
int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
sqlite3_value * v;
int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
if(rc == SQLITE_OK) {
sqlite3_result_value(context, v);
sqlite3_value_free(v);
}else {
sqlite3_result_error_code(context, rc);
}
}
else if(vec0_column_idx_is_metadata(pVtab, i)) {
if(sqlite3_vtab_nochange(context)) {
return SQLITE_OK;
}
i64 rowid = pCur->point_data->rowid;
int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
if(rc != SQLITE_OK) {
const char * zErr = sqlite3_mprintf(
"Could not extract metadata value for column %.*s at rowid %lld",
pVtab->metadata_columns[metadata_idx].name_length,
pVtab->metadata_columns[metadata_idx].name, rowid
);
if(zErr) {
sqlite3_result_error(context, zErr, -1);
sqlite3_free((void *) zErr);
}else {
sqlite3_result_error_nomem(context);
}
}
}
return SQLITE_OK;
}
2024-04-20 13:38:58 -07:00
static int vec0Column_knn(vec0_vtab *pVtab, vec0_cursor *pCur,
sqlite3_context *context, int i) {
if (!pCur->knn_data) {
sqlite3_result_error(context,
"Internal sqlite-vec error: knn_data is NULL.", -1);
return SQLITE_ERROR;
}
if (i == VEC0_COLUMN_ID) {
i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
return vec0_result_id(pVtab, context, rowid);
}
else if (i == vec0_column_distance_idx(pVtab)) {
sqlite3_result_double(
context, pCur->knn_data->distances[pCur->knn_data->current_idx]);
return SQLITE_OK;
}
else if (vec0_column_idx_is_vector(pVtab, i)) {
void *out;
int sz;
int vector_idx = vec0_column_idx_to_vector_idx(pVtab, i);
int rc = vec0_get_vector_data(
pVtab, pCur->knn_data->rowids[pCur->knn_data->current_idx], vector_idx,
&out, &sz);
if (rc != SQLITE_OK) {
return rc;
}
sqlite3_result_blob(context, out, sz, sqlite3_free);
sqlite3_result_subtype(context,
pVtab->vector_columns[vector_idx].element_type);
return SQLITE_OK;
}
else if(vec0_column_idx_is_partition(pVtab, i)) {
int partition_idx = vec0_column_idx_to_partition_idx(pVtab, i);
i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
sqlite3_value * v;
int rc = vec0_get_partition_value_for_rowid(pVtab, rowid, partition_idx, &v);
if(rc == SQLITE_OK) {
sqlite3_result_value(context, v);
sqlite3_value_free(v);
}else {
sqlite3_result_error_code(context, rc);
}
}
else if(vec0_column_idx_is_auxiliary(pVtab, i)) {
int auxiliary_idx = vec0_column_idx_to_auxiliary_idx(pVtab, i);
i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
sqlite3_value * v;
int rc = vec0_get_auxiliary_value_for_rowid(pVtab, rowid, auxiliary_idx, &v);
if(rc == SQLITE_OK) {
sqlite3_result_value(context, v);
sqlite3_value_free(v);
}else {
sqlite3_result_error_code(context, rc);
}
}
2024-04-20 13:38:58 -07:00
else if(vec0_column_idx_is_metadata(pVtab, i)) {
int metadata_idx = vec0_column_idx_to_metadata_idx(pVtab, i);
i64 rowid = pCur->knn_data->rowids[pCur->knn_data->current_idx];
int rc = vec0_result_metadata_value_for_rowid(pVtab, rowid, metadata_idx, context);
if(rc != SQLITE_OK) {
const char * zErr = sqlite3_mprintf(
"Could not extract metadata value for column %.*s at rowid %lld",
pVtab->metadata_columns[metadata_idx].name_length,
pVtab->metadata_columns[metadata_idx].name, rowid
);
if(zErr) {
sqlite3_result_error(context, zErr, -1);
sqlite3_free((void *) zErr);
}else {
sqlite3_result_error_nomem(context);
}
2024-04-20 13:38:58 -07:00
}
}
return SQLITE_OK;
}
static int vec0Column(sqlite3_vtab_cursor *cur, sqlite3_context *context,
int i) {
vec0_cursor *pCur = (vec0_cursor *)cur;
vec0_vtab *pVtab = (vec0_vtab *)cur->pVtab;
switch (pCur->query_plan) {
case VEC0_QUERY_PLAN_FULLSCAN: {
return vec0Column_fullscan(pVtab, pCur, context, i);
2024-04-20 13:38:58 -07:00
}
case VEC0_QUERY_PLAN_KNN: {
return vec0Column_knn(pVtab, pCur, context, i);
2024-04-20 13:38:58 -07:00
}
case VEC0_QUERY_PLAN_POINT: {
return vec0Column_point(pVtab, pCur, context, i);
}
}
return SQLITE_OK;
}
2024-04-20 13:38:58 -07:00
/**
* @brief Handles the "insert rowid" step of a row insert operation of a vec0
* table.
*
* This function will insert a new row into the _rowids vec0 shadow table.
*
* @param p: virtual table
* @param idValue: Value containing the inserted rowid/id value.
* @param rowid: Output rowid, will point to the "real" i64 rowid
* value that was inserted
* @return int SQLITE_OK on success, error code on failure
*/
int vec0Update_InsertRowidStep(vec0_vtab *p, sqlite3_value *idValue,
i64 *rowid) {
2024-04-20 13:38:58 -07:00
/**
* An insert into a vec0 table can happen a few different ways:
* 1) With default INTEGER primary key: With a supplied i64 rowid
* 2) With default INTEGER primary key: WITHOUT a supplied rowid
* 3) With TEXT primary key: supplied text rowid
*/
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
int rc;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
// Option 3: vtab has a user-defined TEXT primary key, so ensure a text value
// is provided.
if (p->pkIsText) {
if (sqlite3_value_type(idValue) != SQLITE_TEXT) {
// IMP: V04200_21039
vtab_set_error(&p->base,
"The %s virtual table was declared with a TEXT primary "
"key, but a non-TEXT value was provided in an INSERT.",
p->tableName);
return SQLITE_ERROR;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
return vec0_rowids_insert_id(p, idValue, rowid);
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
// Option 1: User supplied a i64 rowid
if (sqlite3_value_type(idValue) == SQLITE_INTEGER) {
i64 suppliedRowid = sqlite3_value_int64(idValue);
rc = vec0_rowids_insert_rowid(p, suppliedRowid);
if (rc == SQLITE_OK) {
*rowid = suppliedRowid;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
return rc;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
// Option 2: User did not suppled a rowid
if (sqlite3_value_type(idValue) != SQLITE_NULL) {
// IMP: V30855_14925
2024-07-23 23:57:42 -07:00
vtab_set_error(&p->base,
"Only integers are allows for primary key values on %s",
p->tableName);
return SQLITE_ERROR;
2024-07-23 23:57:28 -07:00
}
// NULL to get next auto-incremented value
return vec0_rowids_insert_id(p, NULL, rowid);
2024-04-20 13:38:58 -07:00
}
/**
* @brief Determines the "next available" chunk position for a newly inserted
* vec0 row.
*
* This operation may insert a new "blank" chunk the _chunks table, if there is
* no more space in previous chunks.
*
* @param p: virtual table
* @param partitionKeyValues: array of partition key column values, to constrain
* against any partition key columns.
* @param chunk_rowid: Output rowid of the chunk in the _chunks virtual table
* that has the avialabiity.
* @param chunk_offset: Output the index of the available space insert the
* chunk, based on the index of the first available validity bit.
* @param pBlobValidity: Output blob of the validity column of the available
* chunk. Will be opened with read/write permissions.
* @param pValidity: Output buffer of the original chunk's validity column.
* Needs to be cleaned up with sqlite3_free().
* @return int SQLITE_OK on success, error code on failure
*/
int vec0Update_InsertNextAvailableStep(
vec0_vtab *p,
sqlite3_value ** partitionKeyValues,
i64 *chunk_rowid, i64 *chunk_offset,
sqlite3_blob **blobChunksValidity,
const unsigned char **bufferChunksValidity) {
2024-04-20 13:38:58 -07:00
int rc;
i64 validitySize;
*chunk_offset = -1;
rc = vec0_get_latest_chunk_rowid(p, chunk_rowid, partitionKeyValues);
if(rc == SQLITE_EMPTY) {
goto done;
}
if (rc != SQLITE_OK) {
goto cleanup;
}
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
*chunk_rowid, 1, blobChunksValidity);
if (rc != SQLITE_OK) {
// IMP: V22053_06123
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"could not open validity blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, *chunk_rowid);
goto cleanup;
}
2024-04-20 13:38:58 -07:00
validitySize = sqlite3_blob_bytes(*blobChunksValidity);
if (validitySize != p->chunk_size / CHAR_BIT) {
// IMP: V29362_13432
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"validity blob size mismatch on "
"%s.%s.%lld, expected %lld but received %lld.",
p->schemaName, p->shadowChunksName, *chunk_rowid,
(i64)(p->chunk_size / CHAR_BIT), validitySize);
rc = SQLITE_ERROR;
goto cleanup;
}
*bufferChunksValidity = sqlite3_malloc(validitySize);
if (!(*bufferChunksValidity)) {
vtab_set_error(&p->base, VEC_INTERAL_ERROR
"Could not allocate memory for validity bitmap");
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
validitySize, 0);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"Could not read validity bitmap for %s.%s.%lld",
p->schemaName, p->shadowChunksName, *chunk_rowid);
goto cleanup;
}
// find the next available offset, ie first `0` in the bitmap.
for (int i = 0; i < validitySize; i++) {
if ((*bufferChunksValidity)[i] == 0b11111111)
continue;
for (int j = 0; j < CHAR_BIT; j++) {
if (((((*bufferChunksValidity)[i] >> j) & 1) == 0)) {
*chunk_offset = (i * CHAR_BIT) + j;
goto done;
}
}
}
done:
// latest chunk was full, so need to create a new one
if (*chunk_offset == -1) {
rc = vec0_new_chunk(p, partitionKeyValues, chunk_rowid);
if (rc != SQLITE_OK) {
// IMP: V08441_25279
vtab_set_error(&p->base,
VEC_INTERAL_ERROR "Could not insert a new vector chunk");
rc = SQLITE_ERROR; // otherwise raises a DatabaseError and not operational
// error?
goto cleanup;
}
*chunk_offset = 0;
// blobChunksValidity and pValidity are stale, pointing to the previous
// (full) chunk. to re-assign them
rc = sqlite3_blob_close(*blobChunksValidity);
sqlite3_free((void *)*bufferChunksValidity);
*blobChunksValidity = NULL;
*bufferChunksValidity = NULL;
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, VEC_INTERAL_ERROR
"unknown error, blobChunksValidity could not be closed, "
"please file an issue.");
rc = SQLITE_ERROR;
goto cleanup;
}
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName,
"validity", *chunk_rowid, 1, blobChunksValidity);
if (rc != SQLITE_OK) {
vtab_set_error(
&p->base,
VEC_INTERAL_ERROR
"Could not open validity blob for newly created chunk %s.%s.%lld",
p->schemaName, p->shadowChunksName, *chunk_rowid);
goto cleanup;
}
validitySize = sqlite3_blob_bytes(*blobChunksValidity);
if (validitySize != p->chunk_size / CHAR_BIT) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"validity blob size mismatch for newly created chunk "
"%s.%s.%lld. Exepcted %lld, got %lld",
p->schemaName, p->shadowChunksName, *chunk_rowid,
p->chunk_size / CHAR_BIT, validitySize);
goto cleanup;
}
*bufferChunksValidity = sqlite3_malloc(validitySize);
rc = sqlite3_blob_read(*blobChunksValidity, (void *)*bufferChunksValidity,
validitySize, 0);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"could not read validity blob newly created chunk "
"%s.%s.%lld",
p->schemaName, p->shadowChunksName, *chunk_rowid);
goto cleanup;
}
}
rc = SQLITE_OK;
cleanup:
return rc;
}
/**
* @brief Write the vector data into the provided vector blob at the given
* offset
*
* @param blobVectors SQLite BLOB to write to
* @param chunk_offset the "offset" (ie validity bitmap position) to write the
* vector to
* @param bVector pointer to the vector containing data
* @param dimensions how many dimensions the vector has
* @param element_type the vector type
* @return result of sqlite3_blob_write, SQLITE_OK on success, otherwise failure
*/
static int
vec0_write_vector_to_vector_blob(sqlite3_blob *blobVectors, i64 chunk_offset,
const void *bVector, size_t dimensions,
enum VectorElementType element_type) {
int n;
int offset;
2024-04-20 13:38:58 -07:00
switch (element_type) {
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
n = dimensions * sizeof(f32);
offset = chunk_offset * dimensions * sizeof(f32);
break;
case SQLITE_VEC_ELEMENT_TYPE_INT8:
n = dimensions * sizeof(i8);
offset = chunk_offset * dimensions * sizeof(i8);
break;
case SQLITE_VEC_ELEMENT_TYPE_BIT:
n = dimensions / CHAR_BIT;
offset = chunk_offset * dimensions / CHAR_BIT;
break;
}
return sqlite3_blob_write(blobVectors, bVector, n, offset);
}
/**
* @brief
*
* @param p vec0 virtual table
* @param chunk_rowid: which chunk to write to
* @param chunk_offset: the offset inside the chunk to write the vector to.
* @param rowid: the rowid of the inserting row
* @param vectorDatas: array of the vector data to insert
* @param blobValidity: writeable validity blob of the row's assigned chunk.
* @param validity: snapshot buffer of the valdity column from the row's
* assigned chunk.
* @return int SQLITE_OK on success, error code on failure
*/
int vec0Update_InsertWriteFinalStep(vec0_vtab *p, i64 chunk_rowid,
i64 chunk_offset, i64 rowid,
void *vectorDatas[],
sqlite3_blob *blobChunksValidity,
const unsigned char *bufferChunksValidity) {
int rc, brc;
sqlite3_blob *blobChunksRowids = NULL;
// mark the validity bit for this row in the chunk's validity bitmap
// Get the byte offset of the bitmap
char unsigned bx = bufferChunksValidity[chunk_offset / CHAR_BIT];
// set the bit at the chunk_offset position inside that byte
bx = bx | (1 << (chunk_offset % CHAR_BIT));
// write that 1 byte
rc = sqlite3_blob_write(blobChunksValidity, &bx, 1, chunk_offset / CHAR_BIT);
2024-06-28 10:51:59 -07:00
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, VEC_INTERAL_ERROR "could not mark validity bit ");
return rc;
}
// Go insert the vector data into the vector chunk shadow tables
for (int i = 0; i < p->numVectorColumns; i++) {
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_FLAT)
continue;
sqlite3_blob *blobVectors;
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
"vectors", chunk_rowid, 1, &blobVectors);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "Error opening vector blob at %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
goto cleanup;
}
i64 expected =
p->chunk_size * vector_column_byte_size(p->vector_columns[i]);
i64 actual = sqlite3_blob_bytes(blobVectors);
if (actual != expected) {
// IMP: V16386_00456
vtab_set_error(
&p->base,
VEC_INTERAL_ERROR
"vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid, expected,
actual);
rc = SQLITE_ERROR;
// already error, can ignore result code
sqlite3_blob_close(blobVectors);
goto cleanup;
};
rc = vec0_write_vector_to_vector_blob(
blobVectors, chunk_offset, vectorDatas[i],
p->vector_columns[i].dimensions, p->vector_columns[i].element_type);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"could not write vector blob on %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
rc = SQLITE_ERROR;
// already error, can ignore result code
sqlite3_blob_close(blobVectors);
goto cleanup;
}
rc = sqlite3_blob_close(blobVectors);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"could not close vector blob on %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_rowid);
rc = SQLITE_ERROR;
goto cleanup;
}
}
// write the new rowid to the rowids column of the _chunks table
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
chunk_rowid, 1, &blobChunksRowids);
if (rc != SQLITE_OK) {
// IMP: V09221_26060
vtab_set_error(&p->base,
VEC_INTERAL_ERROR "could not open rowids blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_rowid);
goto cleanup;
}
i64 expected = p->chunk_size * sizeof(i64);
i64 actual = sqlite3_blob_bytes(blobChunksRowids);
if (expected != actual) {
// IMP: V12779_29618
vtab_set_error(
&p->base,
VEC_INTERAL_ERROR
"rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
p->schemaName, p->shadowChunksName, chunk_rowid, expected, actual);
rc = SQLITE_ERROR;
goto cleanup;
}
rc = sqlite3_blob_write(blobChunksRowids, &rowid, sizeof(i64),
chunk_offset * sizeof(i64));
if (rc != SQLITE_OK) {
vtab_set_error(
&p->base, VEC_INTERAL_ERROR "could not write rowids blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_rowid);
rc = SQLITE_ERROR;
goto cleanup;
}
// Now with all the vectors inserted, go back and update the _rowids table
// with the new chunk_rowid/chunk_offset values
rc = vec0_rowids_update_position(p, rowid, chunk_rowid, chunk_offset);
cleanup:
brc = sqlite3_blob_close(blobChunksRowids);
if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
vtab_set_error(
&p->base, VEC_INTERAL_ERROR "could not close rowids blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_rowid);
return brc;
}
return rc;
}
int vec0_write_metadata_value(vec0_vtab *p, int metadata_column_idx, i64 rowid, i64 chunk_id, i64 chunk_offset, sqlite3_value * v, int isupdate) {
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
int rc;
struct Vec0MetadataColumnDefinition * metadata_column = &p->metadata_columns[metadata_column_idx];
vec0_metadata_column_kind kind = metadata_column->kind;
// verify input value matches column type
switch(kind) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
if(sqlite3_value_type(v) != SQLITE_INTEGER || ((sqlite3_value_int(v) != 0) && (sqlite3_value_int(v) != 1))) {
rc = SQLITE_ERROR;
vtab_set_error(&p->base, "Expected 0 or 1 for BOOLEAN metadata column %.*s", metadata_column->name_length, metadata_column->name);
goto done;
}
break;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER: {
if(sqlite3_value_type(v) != SQLITE_INTEGER) {
rc = SQLITE_ERROR;
vtab_set_error(&p->base, "Expected integer for INTEGER metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
goto done;
}
break;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT: {
if(sqlite3_value_type(v) != SQLITE_FLOAT) {
rc = SQLITE_ERROR;
vtab_set_error(&p->base, "Expected float for FLOAT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
goto done;
}
break;
}
case VEC0_METADATA_COLUMN_KIND_TEXT: {
if(sqlite3_value_type(v) != SQLITE_TEXT) {
rc = SQLITE_ERROR;
vtab_set_error(&p->base, "Expected text for TEXT metadata column %.*s, received %s", metadata_column->name_length, metadata_column->name, type_name(sqlite3_value_type(v)));
goto done;
}
break;
}
}
sqlite3_blob * blobValue = NULL;
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", chunk_id, 1, &blobValue);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(rc != SQLITE_OK) {
goto done;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
switch(kind) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
u8 block;
int value = sqlite3_value_int(v);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = sqlite3_blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT));
if(rc != SQLITE_OK) {
goto done;
}
if (value) {
block |= 1 << (chunk_offset % CHAR_BIT);
} else {
block &= ~(1 << (chunk_offset % CHAR_BIT));
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = sqlite3_blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT);
break;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER: {
i64 value = sqlite3_value_int64(v);
rc = sqlite3_blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
break;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT: {
double value = sqlite3_value_double(v);
rc = sqlite3_blob_write(blobValue, &value, sizeof(value), chunk_offset * sizeof(double));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
break;
}
case VEC0_METADATA_COLUMN_KIND_TEXT: {
int prev_n;
rc = sqlite3_blob_read(blobValue, &prev_n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(rc != SQLITE_OK) {
goto done;
}
const char * s = (const char *) sqlite3_value_text(v);
int n = sqlite3_value_bytes(v);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
memcpy(view, &n, sizeof(int));
memcpy(view+4, s, min(n, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH-4));
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = sqlite3_blob_write(blobValue, &view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
const char * zSql;
if(isupdate && (prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH)) {
zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " SET data = ?2 WHERE rowid = ?1", p->schemaName, p->tableName, metadata_column_idx);
}else {
zSql = sqlite3_mprintf("INSERT INTO " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " (rowid, data) VALUES (?1, ?2)", p->schemaName, p->tableName, metadata_column_idx);
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(!zSql) {
rc = SQLITE_NOMEM;
goto done;
}
sqlite3_stmt * stmt;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
if(rc != SQLITE_OK) {
goto done;
}
sqlite3_bind_int64(stmt, 1, rowid);
sqlite3_bind_text(stmt, 2, s, n, SQLITE_STATIC);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = sqlite3_step(stmt);
sqlite3_finalize(stmt);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
if(rc != SQLITE_DONE) {
rc = SQLITE_ERROR;
goto done;
}
}
else if(prev_n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_column_idx);
if(!zSql) {
rc = SQLITE_NOMEM;
goto done;
}
sqlite3_stmt * stmt;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
if(rc != SQLITE_OK) {
goto done;
}
sqlite3_bind_int64(stmt, 1, rowid);
rc = sqlite3_step(stmt);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
sqlite3_finalize(stmt);
if(rc != SQLITE_DONE) {
rc = SQLITE_ERROR;
goto done;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
break;
}
}
if(rc != SQLITE_OK) {
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
rc = sqlite3_blob_close(blobValue);
if(rc != SQLITE_OK) {
goto done;
}
done:
return rc;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
/**
* @brief Handles INSERT INTO operations on a vec0 table.
*
* @return int SQLITE_OK on success, otherwise error code on failure
*/
int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
sqlite_int64 *pRowid) {
UNUSED_PARAMETER(argc);
vec0_vtab *p = (vec0_vtab *)pVTab;
int rc;
// Rowid for the inserted row, deterimined by the inserted ID + _rowids shadow
// table
i64 rowid;
// Array to hold the vector data of the inserted row. Individual elements will
// have a lifetime bound to the argv[..] values.
void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS];
// Array to hold cleanup functions for vectorDatas[]
vector_cleanup cleanups[VEC0_MAX_VECTOR_COLUMNS];
sqlite3_value * partitionKeyValues[VEC0_MAX_PARTITION_COLUMNS];
// Rowid of the chunk in the _chunks shadow table that the row will be a part
// of.
i64 chunk_rowid;
// offset within the chunk where the rowid belongs
i64 chunk_offset;
// a write-able blob of the validity column for the given chunk. Used to mark
// validity bit
sqlite3_blob *blobChunksValidity = NULL;
// buffer for the valididty column for the given chunk. Maybe not needed here?
const unsigned char *bufferChunksValidity = NULL;
int numReadVectors = 0;
// Read all provided partition key values into partitionKeyValues
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) {
continue;
}
int partition_key_idx = p->user_column_idxs[i];
partitionKeyValues[partition_key_idx] = argv[2+VEC0_COLUMN_USERN_START + i];
int new_value_type = sqlite3_value_type(partitionKeyValues[partition_key_idx]);
if((new_value_type != SQLITE_NULL) && (new_value_type != p->paritition_columns[partition_key_idx].type)) {
// IMP: V11454_28292
vtab_set_error(
pVTab,
"Parition key type mismatch: The partition key column %.*s has type %s, but %s was provided.",
p->paritition_columns[partition_key_idx].name_length,
p->paritition_columns[partition_key_idx].name,
type_name(p->paritition_columns[partition_key_idx].type),
type_name(new_value_type)
);
rc = SQLITE_ERROR;
goto cleanup;
}
}
// read all the inserted vectors into vectorDatas, validate their lengths.
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
continue;
}
int vector_column_idx = p->user_column_idxs[i];
sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START + i];
size_t dimensions;
char *pzError;
enum VectorElementType elementType;
rc = vector_from_value(valueVector, &vectorDatas[vector_column_idx], &dimensions,
&elementType, &cleanups[vector_column_idx], &pzError);
if (rc != SQLITE_OK) {
// IMP: V06519_23358
vtab_set_error(
pVTab, "Inserted vector for the \"%.*s\" column is invalid: %z",
p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name, pzError);
rc = SQLITE_ERROR;
goto cleanup;
}
numReadVectors++;
if (elementType != p->vector_columns[vector_column_idx].element_type) {
// IMP: V08221_25059
vtab_set_error(
pVTab,
"Inserted vector for the \"%.*s\" column is expected to be of type "
"%s, but a %s vector was provided.",
p->vector_columns[i].name_length, p->vector_columns[i].name,
vector_subtype_name(p->vector_columns[i].element_type),
vector_subtype_name(elementType));
rc = SQLITE_ERROR;
goto cleanup;
}
if (dimensions != p->vector_columns[vector_column_idx].dimensions) {
// IMP: V01145_17984
vtab_set_error(
pVTab,
"Dimension mismatch for inserted vector for the \"%.*s\" column. "
"Expected %d dimensions but received %d.",
p->vector_columns[vector_column_idx].name_length, p->vector_columns[vector_column_idx].name,
p->vector_columns[vector_column_idx].dimensions, dimensions);
rc = SQLITE_ERROR;
goto cleanup;
}
}
// Cannot insert a value in the hidden "distance" column
if (sqlite3_value_type(argv[2 + vec0_column_distance_idx(p)]) !=
SQLITE_NULL) {
// IMP: V24228_08298
vtab_set_error(pVTab,
"A value was provided for the hidden \"distance\" column.");
rc = SQLITE_ERROR;
goto cleanup;
}
// Cannot insert a value in the hidden "k" column
if (sqlite3_value_type(argv[2 + vec0_column_k_idx(p)]) != SQLITE_NULL) {
// IMP: V11875_28713
vtab_set_error(pVTab, "A value was provided for the hidden \"k\" column.");
rc = SQLITE_ERROR;
goto cleanup;
}
// Step #1: Insert/get a rowid for this row, from the _rowids table.
rc = vec0Update_InsertRowidStep(p, argv[2 + VEC0_COLUMN_ID], &rowid);
if (rc != SQLITE_OK) {
goto cleanup;
}
if (!vec0_all_columns_diskann(p)) {
// Step #2: Find the next "available" position in the _chunks table for this
// row.
rc = vec0Update_InsertNextAvailableStep(p, partitionKeyValues,
&chunk_rowid, &chunk_offset,
&blobChunksValidity,
&bufferChunksValidity);
if (rc != SQLITE_OK) {
goto cleanup;
}
// Step #3: With the next available chunk position, write out all the vectors
// to their specified location.
rc = vec0Update_InsertWriteFinalStep(p, chunk_rowid, chunk_offset, rowid,
vectorDatas, blobChunksValidity,
bufferChunksValidity);
if (rc != SQLITE_OK) {
goto cleanup;
}
}
#if SQLITE_VEC_ENABLE_DISKANN
// Step #4: Insert into DiskANN graph for indexed vector columns
for (int i = 0; i < p->numVectorColumns; i++) {
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_DISKANN) continue;
rc = diskann_insert(p, i, rowid, vectorDatas[i]);
if (rc != SQLITE_OK) {
goto cleanup;
}
}
#endif
#if SQLITE_VEC_ENABLE_RESCORE
rc = rescore_on_insert(p, chunk_rowid, chunk_offset, rowid, vectorDatas);
if (rc != SQLITE_OK) {
goto cleanup;
}
#endif
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// Step #4: IVF index insert (if any vector column uses IVF)
for (int i = 0; i < p->numVectorColumns; i++) {
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_IVF) continue;
int vecSize = (int)vector_column_byte_size(p->vector_columns[i]);
rc = ivf_insert(p, i, rowid, vectorDatas[i], vecSize);
if (rc != SQLITE_OK) {
goto cleanup;
}
}
#endif
if(p->numAuxiliaryColumns > 0) {
sqlite3_stmt *stmt;
sqlite3_str * s = sqlite3_str_new(NULL);
sqlite3_str_appendf(s, "INSERT INTO " VEC0_SHADOW_AUXILIARY_NAME "(rowid ", p->schemaName, p->tableName);
for(int i = 0; i < p->numAuxiliaryColumns; i++) {
sqlite3_str_appendf(s, ", value%02d", i);
}
sqlite3_str_appendall(s, ") VALUES (? ");
for(int i = 0; i < p->numAuxiliaryColumns; i++) {
sqlite3_str_appendall(s, ", ?");
}
sqlite3_str_appendall(s, ")");
char * zSql = sqlite3_str_finish(s);
// TODO double check error handling ehre
if(!zSql) {
rc = SQLITE_NOMEM;
goto cleanup;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
if(rc != SQLITE_OK) {
goto cleanup;
}
sqlite3_bind_int64(stmt, 1, rowid);
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) {
continue;
}
int auxiliary_key_idx = p->user_column_idxs[i];
sqlite3_value * v = argv[2+VEC0_COLUMN_USERN_START + i];
int v_type = sqlite3_value_type(v);
if(v_type != SQLITE_NULL && (v_type != p->auxiliary_columns[auxiliary_key_idx].type)) {
sqlite3_finalize(stmt);
rc = SQLITE_CONSTRAINT;
vtab_set_error(
pVTab,
"Auxiliary column type mismatch: The auxiliary column %.*s has type %s, but %s was provided.",
p->auxiliary_columns[auxiliary_key_idx].name_length,
p->auxiliary_columns[auxiliary_key_idx].name,
type_name(p->auxiliary_columns[auxiliary_key_idx].type),
type_name(v_type)
);
goto cleanup;
}
// first 1 is for 1-based indexing on sqlite3_bind_*, second 1 is to account for initial rowid parameter
sqlite3_bind_value(stmt, 1 + 1 + auxiliary_key_idx, v);
}
rc = sqlite3_step(stmt);
if(rc != SQLITE_DONE) {
sqlite3_finalize(stmt);
rc = SQLITE_ERROR;
goto cleanup;
}
sqlite3_finalize(stmt);
}
for(int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
continue;
}
int metadata_idx = p->user_column_idxs[i];
sqlite3_value *v = argv[2 + VEC0_COLUMN_USERN_START + i];
rc = vec0_write_metadata_value(p, metadata_idx, rowid, chunk_rowid, chunk_offset, v, 0);
if(rc != SQLITE_OK) {
goto cleanup;
}
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
*pRowid = rowid;
rc = SQLITE_OK;
cleanup:
for (int i = 0; i < numReadVectors; i++) {
cleanups[i](vectorDatas[i]);
}
sqlite3_free((void *)bufferChunksValidity);
int brc = sqlite3_blob_close(blobChunksValidity);
if ((rc == SQLITE_OK) && (brc != SQLITE_OK)) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR "unknown error, blobChunksValidity could "
"not be closed, please file an issue");
return brc;
}
return rc;
}
int vec0Update_Delete_ClearValidity(vec0_vtab *p, i64 chunk_id,
u64 chunk_offset) {
int rc, brc;
sqlite3_blob *blobChunksValidity = NULL;
char unsigned bx;
int validityOffset = chunk_offset / CHAR_BIT;
// 2. ensure chunks.validity bit is 1, then set to 0
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
chunk_id, 1, &blobChunksValidity);
if (rc != SQLITE_OK) {
// IMP: V26002_10073
vtab_set_error(&p->base, "could not open validity blob for %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_id);
return SQLITE_ERROR;
}
// will skip the sqlite3_blob_bytes(blobChunksValidity) check for now,
// the read below would catch it
rc = sqlite3_blob_read(blobChunksValidity, &bx, sizeof(bx), validityOffset);
if (rc != SQLITE_OK) {
// IMP: V21193_05263
vtab_set_error(
&p->base, "could not read validity blob for %s.%s.%lld at %d",
p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
goto cleanup;
}
if (!(bx >> (chunk_offset % CHAR_BIT))) {
// IMP: V21193_05263
rc = SQLITE_ERROR;
vtab_set_error(
&p->base,
"vec0 deletion error: validity bit is not set for %s.%s.%lld at %d",
p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
goto cleanup;
}
char unsigned mask = ~(1 << (chunk_offset % CHAR_BIT));
char result = bx & mask;
rc = sqlite3_blob_write(blobChunksValidity, &result, sizeof(bx),
validityOffset);
if (rc != SQLITE_OK) {
vtab_set_error(
&p->base, "could not write to validity blob for %s.%s.%lld at %d",
p->schemaName, p->shadowChunksName, chunk_id, validityOffset);
goto cleanup;
}
2024-04-20 13:38:58 -07:00
cleanup:
brc = sqlite3_blob_close(blobChunksValidity);
if (rc != SQLITE_OK)
return rc;
if (brc != SQLITE_OK) {
vtab_set_error(&p->base,
"vec0 deletion error: Error commiting validity blob "
"transaction on %s.%s.%lld at %d",
p->schemaName, p->shadowChunksName, chunk_id,
validityOffset);
return brc;
}
2024-04-20 13:38:58 -07:00
return SQLITE_OK;
}
int vec0Update_Delete_ClearRowid(vec0_vtab *p, i64 chunk_id,
u64 chunk_offset) {
int rc, brc;
sqlite3_blob *blobChunksRowids = NULL;
i64 zero = 0;
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
chunk_id, 1, &blobChunksRowids);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "could not open rowids blob for %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_id);
return SQLITE_ERROR;
}
2024-04-20 13:38:58 -07:00
rc = sqlite3_blob_write(blobChunksRowids, &zero, sizeof(zero),
chunk_offset * sizeof(i64));
2024-06-28 10:51:59 -07:00
if (rc != SQLITE_OK) {
vtab_set_error(&p->base,
"could not write to rowids blob for %s.%s.%lld at %llu",
p->schemaName, p->shadowChunksName, chunk_id, chunk_offset);
}
2024-04-20 13:38:58 -07:00
brc = sqlite3_blob_close(blobChunksRowids);
if (rc != SQLITE_OK)
return rc;
if (brc != SQLITE_OK) {
vtab_set_error(&p->base,
"vec0 deletion error: Error commiting rowids blob "
"transaction on %s.%s.%lld at %llu",
p->schemaName, p->shadowChunksName, chunk_id, chunk_offset);
return brc;
}
return SQLITE_OK;
}
2024-04-20 13:38:58 -07:00
int vec0Update_Delete_ClearVectors(vec0_vtab *p, i64 chunk_id,
u64 chunk_offset) {
int rc, brc;
for (int i = 0; i < p->numVectorColumns; i++) {
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_FLAT)
continue;
sqlite3_blob *blobVectors = NULL;
size_t n = vector_column_byte_size(p->vector_columns[i]);
rc = sqlite3_blob_open(p->db, p->schemaName,
p->shadowVectorChunksNames[i], "vectors",
chunk_id, 1, &blobVectors);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base,
"could not open vector blob for %s.%s.%lld column %d",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id, i);
return SQLITE_ERROR;
}
void *zeroBuf = sqlite3_malloc(n);
if (!zeroBuf) {
sqlite3_blob_close(blobVectors);
return SQLITE_NOMEM;
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
}
memset(zeroBuf, 0, n);
Metadata filtering (#124) * initial pass at PARTITION KEY support. * Initial pass, allow auxiliary columns on vec0 virtual tables * update TODO * Initial pass at metadata filtering * unit tests * gha this PR branch * fixup tests * doc internal * fix tests, KNN/rowids in * define SQLITE_INDEX_CONSTRAINT_OFFSET * whoops * update tests, syrupy, use uv * un ignore pyproject.toml * dot * tests/ * type error? * win: .exe, update error name * try fix macos python, paren around expr? * win bash? * dbg :( * explicit error * op * dbg win * win ./tests/.venv/Scripts/python.exe * block UPDATEs on partition key values for now * test this branch * accidentally removved "partition key type mistmatch" block during merge * typo ugh * bruv * start aux snapshots * drop aux shadow table on destroy * enforce column types * block WHERE constraints on auxiliary columns in KNN queries * support delete * support UPDATE on auxiliary columns * test this PR * dont inline that * test-metadata.py * memzero text buffer * stress test * more snpashot tests * rm double/int32, just float/int64 * finish type checking * long text support * DELETE support * UPDATE support * fix snapshot names * drop not-used in eqp * small fixes * boolean comparison handling * ensure error is raised when long string constraint * new version string for beta builds * typo whoops * ann-filtering-benchmark directory * test-case * updates * fix aux column error when using non-default rowid values, needs test * refactor some text knn filtering * rowids blob read only on text metadata filters * refactor * add failing test causes for non eq text knn * text knn NE * test cases diff * GT * text knn GT/GE fixes * text knn LT/LE * clean * vtab_in handling * unblock aux failures for now * guard sqlite3_vtab_in * else in guard? * fixes and tests * add broken shadow table test * rename _metadata_chunksNN shadown table to _metadatachunksNN, for proper shadowName detection * _metadata_text_NN shadow tables to _metadatatextNN * SQLITE_VEC_VERSION_MAJOR SQLITE_VEC_VERSION_MINOR and SQLITE_VEC_VERSION_PATCH in sqlite-vec.h * _info shadow table * forgot to update aux snapshot? * fix aux tests
2024-11-20 00:59:34 -08:00
rc = sqlite3_blob_write(blobVectors, zeroBuf, n, chunk_offset * n);
sqlite3_free(zeroBuf);
if (rc != SQLITE_OK) {
vtab_set_error(
&p->base,
"could not write to vector blob for %s.%s.%lld at %llu column %d",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id,
chunk_offset, i);
2024-04-20 13:38:58 -07:00
}
brc = sqlite3_blob_close(blobVectors);
if (rc != SQLITE_OK)
return rc;
if (brc != SQLITE_OK) {
vtab_set_error(&p->base,
"vec0 deletion error: Error commiting vector blob "
"transaction on %s.%s.%lld column %d",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id, i);
return brc;
2024-04-20 13:38:58 -07:00
}
}
return SQLITE_OK;
}
int vec0Update_Delete_DeleteChunkIfEmpty(vec0_vtab *p, i64 chunk_id,
int *deleted) {
int rc, brc;
sqlite3_blob *blobValidity = NULL;
*deleted = 0;
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "validity",
chunk_id, 0, &blobValidity);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base,
"could not open validity blob for chunk %lld", chunk_id);
2024-04-20 13:38:58 -07:00
return SQLITE_ERROR;
}
int validitySize = sqlite3_blob_bytes(blobValidity);
unsigned char *validityBuf = sqlite3_malloc(validitySize);
if (!validityBuf) {
sqlite3_blob_close(blobValidity);
return SQLITE_NOMEM;
2024-04-20 13:38:58 -07:00
}
rc = sqlite3_blob_read(blobValidity, validityBuf, validitySize, 0);
brc = sqlite3_blob_close(blobValidity);
if (rc != SQLITE_OK) {
sqlite3_free(validityBuf);
return rc;
}
if (brc != SQLITE_OK) {
sqlite3_free(validityBuf);
return brc;
}
int allZero = 1;
for (int i = 0; i < validitySize; i++) {
if (validityBuf[i] != 0) {
allZero = 0;
break;
}
}
sqlite3_free(validityBuf);
if (!allZero) {
return SQLITE_OK;
}
// All validity bits are zero — delete this chunk and its associated data
char *zSql;
sqlite3_stmt *stmt;
2024-04-20 13:38:58 -07:00
// Delete from _chunks
zSql = sqlite3_mprintf(
"DELETE FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE rowid = ?",
p->schemaName, p->tableName);
if (!zSql)
return SQLITE_NOMEM;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK)
return rc;
sqlite3_bind_int64(stmt, 1, chunk_id);
rc = sqlite3_step(stmt);
sqlite3_finalize(stmt);
if (rc != SQLITE_DONE)
return SQLITE_ERROR;
2024-07-05 12:07:45 -07:00
// Delete from each _vector_chunksNN
for (int i = 0; i < p->numVectorColumns; i++) {
// Non-FLAT columns (rescore, IVF, DiskANN) don't use _vector_chunks
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_FLAT)
continue;
zSql = sqlite3_mprintf(
"DELETE FROM " VEC0_SHADOW_VECTOR_N_NAME " WHERE rowid = ?",
p->schemaName, p->tableName, i);
if (!zSql)
return SQLITE_NOMEM;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK)
return rc;
sqlite3_bind_int64(stmt, 1, chunk_id);
rc = sqlite3_step(stmt);
sqlite3_finalize(stmt);
if (rc != SQLITE_DONE)
return SQLITE_ERROR;
}
#if SQLITE_VEC_ENABLE_RESCORE
rc = rescore_delete_chunk(p, chunk_id);
if (rc != SQLITE_OK)
return rc;
#endif
// Delete from each _metadatachunksNN
for (int i = 0; i < p->numMetadataColumns; i++) {
zSql = sqlite3_mprintf(
"DELETE FROM " VEC0_SHADOW_METADATA_N_NAME " WHERE rowid = ?",
p->schemaName, p->tableName, i);
if (!zSql)
return SQLITE_NOMEM;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK)
return rc;
sqlite3_bind_int64(stmt, 1, chunk_id);
rc = sqlite3_step(stmt);
sqlite3_finalize(stmt);
if (rc != SQLITE_DONE)
return SQLITE_ERROR;
}
// Invalidate cached stmtLatestChunk so it gets re-prepared on next insert
if (p->stmtLatestChunk) {
sqlite3_finalize(p->stmtLatestChunk);
p->stmtLatestChunk = NULL;
}
*deleted = 1;
return SQLITE_OK;
}
int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) {
int rc;
sqlite3_stmt *stmt = NULL;
char *zSql =
sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_ROWIDS_NAME " WHERE rowid = ?",
p->schemaName, p->tableName);
if (!zSql) {
return SQLITE_NOMEM;
}
2024-07-05 12:07:45 -07:00
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) {
goto cleanup;
}
sqlite3_bind_int64(stmt, 1, rowid);
rc = sqlite3_step(stmt);
if (rc != SQLITE_DONE) {
goto cleanup;
}
rc = SQLITE_OK;
cleanup:
sqlite3_finalize(stmt);
return rc;
}
int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) {
int rc;
sqlite3_stmt *stmt = NULL;
char *zSql =
sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_AUXILIARY_NAME " WHERE rowid = ?",
p->schemaName, p->tableName);
if (!zSql) {
return SQLITE_NOMEM;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) {
goto cleanup;
}
sqlite3_bind_int64(stmt, 1, rowid);
rc = sqlite3_step(stmt);
if (rc != SQLITE_DONE) {
goto cleanup;
}
rc = SQLITE_OK;
cleanup:
sqlite3_finalize(stmt);
return rc;
}
int vec0Update_Delete_ClearMetadata(vec0_vtab *p, int metadata_idx, i64 rowid, i64 chunk_id,
u64 chunk_offset) {
int rc;
sqlite3_blob * blobValue;
vec0_metadata_column_kind kind = p->metadata_columns[metadata_idx].kind;
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_idx], "data", chunk_id, 1, &blobValue);
if(rc != SQLITE_OK) {
return rc;
}
switch(kind) {
case VEC0_METADATA_COLUMN_KIND_BOOLEAN: {
u8 block;
rc = sqlite3_blob_read(blobValue, &block, sizeof(u8), (int) (chunk_offset / CHAR_BIT));
if(rc != SQLITE_OK) {
goto done;
}
block &= ~(1 << (chunk_offset % CHAR_BIT));
rc = sqlite3_blob_write(blobValue, &block, sizeof(u8), chunk_offset / CHAR_BIT);
break;
}
case VEC0_METADATA_COLUMN_KIND_INTEGER: {
i64 v = 0;
rc = sqlite3_blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(i64));
break;
}
case VEC0_METADATA_COLUMN_KIND_FLOAT: {
double v = 0;
rc = sqlite3_blob_write(blobValue, &v, sizeof(v), chunk_offset * sizeof(double));
break;
}
case VEC0_METADATA_COLUMN_KIND_TEXT: {
int n;
rc = sqlite3_blob_read(blobValue, &n, sizeof(int), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
if(rc != SQLITE_OK) {
goto done;
}
u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH];
memset(view, 0, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
rc = sqlite3_blob_write(blobValue, &view, sizeof(view), chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH);
if(rc != SQLITE_OK) {
goto done;
}
if(n > VEC0_METADATA_TEXT_VIEW_DATA_LENGTH) {
const char * zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_TEXT_DATA_NAME " WHERE rowid = ?", p->schemaName, p->tableName, metadata_idx);
if(!zSql) {
rc = SQLITE_NOMEM;
goto done;
}
sqlite3_stmt * stmt;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
if(rc != SQLITE_OK) {
goto done;
}
sqlite3_bind_int64(stmt, 1, rowid);
rc = sqlite3_step(stmt);
sqlite3_finalize(stmt);
if(rc != SQLITE_DONE) {
rc = SQLITE_ERROR;
goto done;
}
// Fix for https://github.com/asg017/sqlite-vec/issues/274
// sqlite3_step returns SQLITE_DONE (101) on DML success, but the
// `done:` epilogue treats anything other than SQLITE_OK as an error.
// Without this, SQLITE_DONE propagates up to vec0Update_Delete,
// which aborts the DELETE scan and silently drops remaining rows.
rc = SQLITE_OK;
}
break;
}
}
int rc2;
done:
rc2 = sqlite3_blob_close(blobValue);
if(rc == SQLITE_OK) {
return rc2;
}
return rc;
}
int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
vec0_vtab *p = (vec0_vtab *)pVTab;
int rc;
i64 rowid;
i64 chunk_id = 0;
i64 chunk_offset = 0;
if (p->pkIsText) {
rc = vec0_rowid_from_id(p, idValue, &rowid);
if (rc != SQLITE_OK) {
return rc;
}
} else {
rowid = sqlite3_value_int64(idValue);
}
// 1. Find chunk position for given rowid
// 2. Ensure that validity bit for position is 1, then set to 0
// 3. Zero out rowid in chunks.rowid
// 4. Zero out vector data in all vector column chunks
// 5. Delete value in _rowids table
#if SQLITE_VEC_ENABLE_DISKANN
// DiskANN graph deletion for indexed columns
for (int i = 0; i < p->numVectorColumns; i++) {
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_DISKANN) continue;
rc = diskann_delete(p, i, rowid);
if (rc != SQLITE_OK) {
return rc;
}
}
#endif
if (!vec0_all_columns_diskann(p)) {
// 1. get chunk_id and chunk_offset from _rowids
rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
if (rc != SQLITE_OK) {
return rc;
}
2024-07-31 12:56:09 -07:00
// 2. clear validity bit
rc = vec0Update_Delete_ClearValidity(p, chunk_id, chunk_offset);
if (rc != SQLITE_OK) {
return rc;
}
// 3. zero out rowid in chunks.rowids
rc = vec0Update_Delete_ClearRowid(p, chunk_id, chunk_offset);
if (rc != SQLITE_OK) {
return rc;
}
// 4. zero out any data in vector chunks tables
rc = vec0Update_Delete_ClearVectors(p, chunk_id, chunk_offset);
if (rc != SQLITE_OK) {
return rc;
}
#if SQLITE_VEC_ENABLE_RESCORE
// 4b. zero out quantized data in rescore chunk tables, delete from rescore vectors
rc = rescore_on_delete(p, chunk_id, chunk_offset, rowid);
if (rc != SQLITE_OK) {
return rc;
}
#endif
}
// 5. delete from _rowids table
rc = vec0Update_Delete_DeleteRowids(p, rowid);
if (rc != SQLITE_OK) {
return rc;
}
// 6. delete any auxiliary rows
if(p->numAuxiliaryColumns > 0) {
rc = vec0Update_Delete_DeleteAux(p, rowid);
if (rc != SQLITE_OK) {
return rc;
}
}
// 7. delete metadata and reclaim chunk (only when using chunk-based storage)
if (!vec0_all_columns_diskann(p)) {
for(int i = 0; i < p->numMetadataColumns; i++) {
rc = vec0Update_Delete_ClearMetadata(p, i, rowid, chunk_id, chunk_offset);
if (rc != SQLITE_OK) {
return rc;
}
}
// 8. reclaim chunk if fully empty
{
int chunkDeleted;
rc = vec0Update_Delete_DeleteChunkIfEmpty(p, chunk_id, &chunkDeleted);
if (rc != SQLITE_OK) {
return rc;
}
}
}
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
// 7. delete from IVF index
for (int i = 0; i < p->numVectorColumns; i++) {
if (p->vector_columns[i].index_type != VEC0_INDEX_TYPE_IVF) continue;
rc = ivf_delete(p, i, rowid);
if (rc != SQLITE_OK) return rc;
}
#endif
return SQLITE_OK;
}
int vec0Update_UpdateAuxColumn(vec0_vtab *p, int auxiliary_column_idx, sqlite3_value * value, i64 rowid) {
int rc;
sqlite3_stmt *stmt;
const char * zSql = sqlite3_mprintf("UPDATE " VEC0_SHADOW_AUXILIARY_NAME " SET value%02d = ? WHERE rowid = ?", p->schemaName, p->tableName, auxiliary_column_idx);
if(!zSql) {
return SQLITE_NOMEM;
}
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL);
if(rc != SQLITE_OK) {
return rc;
}
sqlite3_bind_value(stmt, 1, value);
sqlite3_bind_int64(stmt, 2, rowid);
rc = sqlite3_step(stmt);
if(rc != SQLITE_DONE) {
sqlite3_finalize(stmt);
return SQLITE_ERROR;
}
sqlite3_finalize(stmt);
return SQLITE_OK;
}
int vec0Update_UpdateVectorColumn(vec0_vtab *p, i64 chunk_id, i64 chunk_offset,
int i, sqlite3_value *valueVector, i64 rowid) {
int rc;
#if !SQLITE_VEC_ENABLE_RESCORE
UNUSED_PARAMETER(rowid);
#endif
sqlite3_blob *blobVectors = NULL;
char *pzError;
size_t dimensions;
enum VectorElementType elementType;
void *vector;
vector_cleanup cleanup = vector_cleanup_noop;
// https://github.com/asg017/sqlite-vec/issues/53
rc = vector_from_value(valueVector, &vector, &dimensions, &elementType,
&cleanup, &pzError);
if (rc != SQLITE_OK) {
// IMP: V15203_32042
vtab_set_error(
&p->base, "Updated vector for the \"%.*s\" column is invalid: %z",
p->vector_columns[i].name_length, p->vector_columns[i].name, pzError);
rc = SQLITE_ERROR;
goto cleanup;
}
if (elementType != p->vector_columns[i].element_type) {
// IMP: V03643_20481
vtab_set_error(
&p->base,
"Updated vector for the \"%.*s\" column is expected to be of type "
"%s, but a %s vector was provided.",
p->vector_columns[i].name_length, p->vector_columns[i].name,
vector_subtype_name(p->vector_columns[i].element_type),
vector_subtype_name(elementType));
rc = SQLITE_ERROR;
goto cleanup;
}
if (dimensions != p->vector_columns[i].dimensions) {
// IMP: V25739_09810
vtab_set_error(
&p->base,
"Dimension mismatch for new updated vector for the \"%.*s\" column. "
"Expected %d dimensions but received %d.",
p->vector_columns[i].name_length, p->vector_columns[i].name,
p->vector_columns[i].dimensions, dimensions);
rc = SQLITE_ERROR;
goto cleanup;
}
#if SQLITE_VEC_ENABLE_RESCORE
if (p->vector_columns[i].index_type == VEC0_INDEX_TYPE_RESCORE) {
// For rescore columns, update _rescore_vectors and _rescore_chunks
struct VectorColumnDefinition *col = &p->vector_columns[i];
size_t qsize = rescore_quantized_byte_size(col);
size_t fsize = vector_column_byte_size(*col);
// 1. Update quantized chunk
{
void *qbuf = sqlite3_malloc(qsize);
if (!qbuf) { rc = SQLITE_NOMEM; goto cleanup; }
switch (col->rescore.quantizer_type) {
case VEC0_RESCORE_QUANTIZER_BIT:
rescore_quantize_float_to_bit((const float *)vector, (uint8_t *)qbuf, col->dimensions);
break;
case VEC0_RESCORE_QUANTIZER_INT8:
rescore_quantize_float_to_int8((const float *)vector, (int8_t *)qbuf, col->dimensions);
break;
}
sqlite3_blob *blobQ = NULL;
rc = sqlite3_blob_open(p->db, p->schemaName,
p->shadowRescoreChunksNames[i], "vectors",
chunk_id, 1, &blobQ);
if (rc != SQLITE_OK) { sqlite3_free(qbuf); goto cleanup; }
rc = sqlite3_blob_write(blobQ, qbuf, qsize, chunk_offset * qsize);
sqlite3_free(qbuf);
int brc2 = sqlite3_blob_close(blobQ);
if (rc != SQLITE_OK) goto cleanup;
if (brc2 != SQLITE_OK) { rc = brc2; goto cleanup; }
}
// 2. Update float vector in _rescore_vectors (keyed by user rowid)
{
char *zSql = sqlite3_mprintf(
"UPDATE \"%w\".\"%w\" SET vector = ? WHERE rowid = ?",
p->schemaName, p->shadowRescoreVectorsNames[i]);
if (!zSql) { rc = SQLITE_NOMEM; goto cleanup; }
sqlite3_stmt *stmtUp;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmtUp, NULL);
sqlite3_free(zSql);
if (rc != SQLITE_OK) goto cleanup;
sqlite3_bind_blob(stmtUp, 1, vector, fsize, SQLITE_TRANSIENT);
sqlite3_bind_int64(stmtUp, 2, rowid);
rc = sqlite3_step(stmtUp);
sqlite3_finalize(stmtUp);
if (rc != SQLITE_DONE) { rc = SQLITE_ERROR; goto cleanup; }
}
rc = SQLITE_OK;
goto cleanup;
}
#endif
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
"vectors", chunk_id, 1, &blobVectors);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "Could not open vectors blob for %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
goto cleanup;
}
rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, vector,
p->vector_columns[i].dimensions,
p->vector_columns[i].element_type);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "Could not write to vectors blob for %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
goto cleanup;
}
cleanup:
cleanup(vector);
int brc = sqlite3_blob_close(blobVectors);
if (rc != SQLITE_OK) {
return rc;
}
if (brc != SQLITE_OK) {
vtab_set_error(
&p->base,
"Could not commit blob transaction for vectors blob for %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
return brc;
}
return SQLITE_OK;
}
int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) {
UNUSED_PARAMETER(argc);
vec0_vtab *p = (vec0_vtab *)pVTab;
int rc;
i64 chunk_id;
i64 chunk_offset;
i64 rowid;
if (p->pkIsText) {
const char *a = (const char *)sqlite3_value_text(argv[0]);
const char *b = (const char *)sqlite3_value_text(argv[1]);
// IMP: V08886_25725
if ((sqlite3_value_bytes(argv[0]) != sqlite3_value_bytes(argv[1])) ||
strncmp(a, b, sqlite3_value_bytes(argv[0])) != 0) {
vtab_set_error(pVTab,
"UPDATEs on vec0 primary key values are not allowed.");
return SQLITE_ERROR;
}
rc = vec0_rowid_from_id(p, argv[0], &rowid);
if (rc != SQLITE_OK) {
return rc;
}
} else {
rowid = sqlite3_value_int64(argv[0]);
}
// 1) get chunk_id and chunk_offset from _rowids
rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset);
if (rc != SQLITE_OK) {
return rc;
}
// 2) update any partition key values
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_PARTITION) {
continue;
}
sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
if(sqlite3_value_nochange(value)) {
continue;
}
vtab_set_error(pVTab, "UPDATE on partition key columns are not supported yet. ");
return SQLITE_ERROR;
}
// 3) handle auxiliary column updates
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_AUXILIARY) {
continue;
2024-07-05 12:07:45 -07:00
}
int auxiliary_column_idx = p->user_column_idxs[i];
sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
if(sqlite3_value_nochange(value)) {
continue;
2024-07-05 12:07:45 -07:00
}
rc = vec0Update_UpdateAuxColumn(p, auxiliary_column_idx, value, rowid);
if(rc != SQLITE_OK) {
2024-07-05 12:07:45 -07:00
return SQLITE_ERROR;
}
}
// 4) handle metadata column updates
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_METADATA) {
continue;
}
int metadata_column_idx = p->user_column_idxs[i];
sqlite3_value * value = argv[2+VEC0_COLUMN_USERN_START + i];
if(sqlite3_value_nochange(value)) {
continue;
2024-07-05 12:07:45 -07:00
}
rc = vec0_write_metadata_value(p, metadata_column_idx, rowid, chunk_id, chunk_offset, value, 1);
if(rc != SQLITE_OK) {
return rc;
2024-07-05 12:07:45 -07:00
}
}
// 5) iterate over all new vectors, update the vectors
for (int i = 0; i < vec0_num_defined_user_columns(p); i++) {
if(p->user_column_kinds[i] != SQLITE_VEC0_USER_COLUMN_KIND_VECTOR) {
continue;
}
int vector_idx = p->user_column_idxs[i];
sqlite3_value *valueVector = argv[2 + VEC0_COLUMN_USERN_START + i];
// in vec0Column, we check sqlite3_vtab_nochange() on vector columns.
// If the vector column isn't being changed, we return NULL;
// That's not great, that means vector columns can never be NULLABLE
// (bc we cant distinguish if an updated vector is truly NULL or nochange).
// Also it means that if someone tries to run `UPDATE v SET X = NULL`,
// we can't effectively detect and raise an error.
// A better solution would be to use a custom result_type for "empty",
// but subtypes don't appear to survive xColumn -> xUpdate, it's always 0.
// So for now, we'll just use NULL and warn people to not SET X = NULL
// in the docs.
if (sqlite3_value_type(valueVector) == SQLITE_NULL) {
continue;
2024-07-31 12:56:09 -07:00
}
// Block vector UPDATE for index types that don't implement it —
// the DiskANN graph / IVF lists would become stale.
{
enum Vec0IndexType idx_type = p->vector_columns[vector_idx].index_type;
const char *idx_name = NULL;
if (idx_type == VEC0_INDEX_TYPE_DISKANN) idx_name = "DiskANN";
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
else if (idx_type == VEC0_INDEX_TYPE_IVF) idx_name = "IVF";
#endif
if (idx_name) {
vtab_set_error(
&p->base,
"UPDATE on vector column \"%.*s\" is not supported for %s indexes.",
p->vector_columns[vector_idx].name_length,
p->vector_columns[vector_idx].name,
idx_name);
return SQLITE_ERROR;
}
}
rc = vec0Update_UpdateVectorColumn(p, chunk_id, chunk_offset, vector_idx,
valueVector, rowid);
if (rc != SQLITE_OK) {
return SQLITE_ERROR;
}
}
return SQLITE_OK;
}
static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
sqlite_int64 *pRowid) {
// DELETE operation
if (argc == 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
return vec0Update_Delete(pVTab, argv[0]);
2024-07-31 12:56:09 -07:00
}
// INSERT operation
else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) {
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE || SQLITE_VEC_ENABLE_DISKANN
// Check for command inserts: INSERT INTO t(rowid) VALUES ('command-string')
// The id column holds the command string.
sqlite3_value *idVal = argv[2 + VEC0_COLUMN_ID];
if (sqlite3_value_type(idVal) == SQLITE_TEXT) {
const char *cmd = (const char *)sqlite3_value_text(idVal);
vec0_vtab *p = (vec0_vtab *)pVTab;
int cmdRc = SQLITE_EMPTY;
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
cmdRc = ivf_handle_command(p, cmd, argc, argv);
#endif
#if SQLITE_VEC_ENABLE_DISKANN
if (cmdRc == SQLITE_EMPTY)
cmdRc = diskann_handle_command(p, cmd);
#endif
if (cmdRc != SQLITE_EMPTY) return cmdRc; // handled (or error)
// SQLITE_EMPTY means not a recognized command — fall through to normal insert
}
#endif
return vec0Update_Insert(pVTab, argc, argv, pRowid);
2024-07-31 12:56:09 -07:00
}
// UPDATE operation
else if (argc > 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) {
return vec0Update_Update(pVTab, argc, argv);
} else {
vtab_set_error(pVTab, "Unrecognized xUpdate operation provided for vec0.");
return SQLITE_ERROR;
2024-07-31 12:56:09 -07:00
}
}
static int vec0ShadowName(const char *zName) {
static const char *azName[] = {
"rowids", "chunks", "auxiliary", "info",
// Up to VEC0_MAX_METADATA_COLUMNS
// TODO be smarter about this man
"metadatachunks00",
"metadatachunks01",
"metadatachunks02",
"metadatachunks03",
"metadatachunks04",
"metadatachunks05",
"metadatachunks06",
"metadatachunks07",
"metadatachunks08",
"metadatachunks09",
"metadatachunks10",
"metadatachunks11",
"metadatachunks12",
"metadatachunks13",
"metadatachunks14",
"metadatachunks15",
// Up to
"metadatatext00",
"metadatatext01",
"metadatatext02",
"metadatatext03",
"metadatatext04",
"metadatatext05",
"metadatatext06",
"metadatatext07",
"metadatatext08",
"metadatatext09",
"metadatatext10",
"metadatatext11",
"metadatatext12",
"metadatatext13",
"metadatatext14",
"metadatatext15",
};
for (size_t i = 0; i < sizeof(azName) / sizeof(azName[0]); i++) {
if (sqlite3_stricmp(zName, azName[i]) == 0)
return 1;
}
//for(size_t i = 0; i < )"vector_chunks", "metadatachunks"
return 0;
}
static int vec0Begin(sqlite3_vtab *pVTab) {
UNUSED_PARAMETER(pVTab);
return SQLITE_OK;
}
static int vec0Sync(sqlite3_vtab *pVTab) {
UNUSED_PARAMETER(pVTab);
vec0_vtab *p = (vec0_vtab *)pVTab;
if (p->stmtLatestChunk) {
sqlite3_finalize(p->stmtLatestChunk);
p->stmtLatestChunk = NULL;
2024-06-13 16:32:57 -07:00
}
if (p->stmtRowidsInsertRowid) {
sqlite3_finalize(p->stmtRowidsInsertRowid);
p->stmtRowidsInsertRowid = NULL;
}
if (p->stmtRowidsInsertId) {
sqlite3_finalize(p->stmtRowidsInsertId);
p->stmtRowidsInsertId = NULL;
2024-06-13 16:32:57 -07:00
}
if (p->stmtRowidsUpdatePosition) {
sqlite3_finalize(p->stmtRowidsUpdatePosition);
p->stmtRowidsUpdatePosition = NULL;
2024-06-13 16:32:57 -07:00
}
if (p->stmtRowidsGetChunkPosition) {
sqlite3_finalize(p->stmtRowidsGetChunkPosition);
p->stmtRowidsGetChunkPosition = NULL;
}
return SQLITE_OK;
}
static int vec0Commit(sqlite3_vtab *pVTab) {
UNUSED_PARAMETER(pVTab);
return SQLITE_OK;
}
static int vec0Rollback(sqlite3_vtab *pVTab) {
UNUSED_PARAMETER(pVTab);
return SQLITE_OK;
}
static sqlite3_module vec0Module = {
/* iVersion */ 3,
/* xCreate */ vec0Create,
/* xConnect */ vec0Connect,
/* xBestIndex */ vec0BestIndex,
/* xDisconnect */ vec0Disconnect,
/* xDestroy */ vec0Destroy,
/* xOpen */ vec0Open,
/* xClose */ vec0Close,
/* xFilter */ vec0Filter,
/* xNext */ vec0Next,
/* xEof */ vec0Eof,
/* xColumn */ vec0Column,
/* xRowid */ vec0Rowid,
/* xUpdate */ vec0Update,
/* xBegin */ vec0Begin,
/* xSync */ vec0Sync,
/* xCommit */ vec0Commit,
/* xRollback */ vec0Rollback,
/* xFindFunction */ 0,
/* xRename */ 0, // https://github.com/asg017/sqlite-vec/issues/43
/* xSavepoint */ 0,
/* xRelease */ 0,
/* xRollbackTo */ 0,
/* xShadowName */ vec0ShadowName,
2024-07-05 12:07:45 -07:00
#if SQLITE_VERSION_NUMBER >= 3044000
/* xIntegrity */ 0, // https://github.com/asg017/sqlite-vec/issues/44
#endif
2024-07-05 12:07:45 -07:00
};
#pragma endregion
2024-04-20 13:38:58 -07:00
#ifdef SQLITE_VEC_ENABLE_AVX
#define SQLITE_VEC_DEBUG_BUILD_AVX "avx"
#else
#define SQLITE_VEC_DEBUG_BUILD_AVX ""
#endif
#ifdef SQLITE_VEC_ENABLE_NEON
#define SQLITE_VEC_DEBUG_BUILD_NEON "neon"
#else
#define SQLITE_VEC_DEBUG_BUILD_NEON ""
#endif
#if SQLITE_VEC_ENABLE_RESCORE
#define SQLITE_VEC_DEBUG_BUILD_RESCORE "rescore"
#else
#define SQLITE_VEC_DEBUG_BUILD_RESCORE ""
#endif
2024-04-20 13:38:58 -07:00
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
#define SQLITE_VEC_DEBUG_BUILD_IVF "ivf"
#else
#define SQLITE_VEC_DEBUG_BUILD_IVF ""
#endif
#if SQLITE_VEC_ENABLE_DISKANN
#define SQLITE_VEC_DEBUG_BUILD_DISKANN "diskann"
#else
#define SQLITE_VEC_DEBUG_BUILD_DISKANN ""
#endif
2024-04-20 13:38:58 -07:00
#define SQLITE_VEC_DEBUG_BUILD \
SQLITE_VEC_DEBUG_BUILD_AVX " " SQLITE_VEC_DEBUG_BUILD_NEON " " \
SQLITE_VEC_DEBUG_BUILD_RESCORE " " SQLITE_VEC_DEBUG_BUILD_IVF " " \
SQLITE_VEC_DEBUG_BUILD_DISKANN
2024-04-20 13:38:58 -07:00
#define SQLITE_VEC_DEBUG_STRING \
"Version: " SQLITE_VEC_VERSION "\n" \
"Date: " SQLITE_VEC_DATE "\n" \
"Commit: " SQLITE_VEC_SOURCE "\n" \
"Build flags: " SQLITE_VEC_DEBUG_BUILD
SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
2024-08-10 23:33:28 -07:00
const sqlite3_api_routines *pApi) {
2024-09-20 13:17:57 -07:00
#ifndef SQLITE_CORE
2024-04-20 13:38:58 -07:00
SQLITE_EXTENSION_INIT2(pApi);
2024-09-20 13:17:57 -07:00
#endif
2024-04-20 13:38:58 -07:00
int rc = SQLITE_OK;
2024-07-05 12:07:45 -07:00
#define DEFAULT_FLAGS (SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC)
rc = sqlite3_create_function_v2(db, "vec_version", 0, DEFAULT_FLAGS,
SQLITE_VEC_VERSION, _static_text_func, NULL,
NULL, NULL);
if (rc != SQLITE_OK) {
2024-06-28 22:03:54 -07:00
return rc;
}
2024-07-05 12:07:45 -07:00
rc = sqlite3_create_function_v2(db, "vec_debug", 0, DEFAULT_FLAGS,
SQLITE_VEC_DEBUG_STRING, _static_text_func,
NULL, NULL, NULL);
if (rc != SQLITE_OK) {
2024-06-28 22:03:54 -07:00
return rc;
}
2024-06-28 21:38:50 -07:00
static struct {
2024-06-28 20:56:51 -07:00
const char *zFName;
2024-04-20 13:38:58 -07:00
void (*xFunc)(sqlite3_context *, int, sqlite3_value **);
int nArg;
int flags;
} aFunc[] = {
// clang-format off
2024-06-28 22:03:54 -07:00
//{"vec_version", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_VERSION },
//{"vec_debug", _static_text_func, 0, DEFAULT_FLAGS, (void *) SQLITE_VEC_DEBUG_STRING },
{"vec_distance_l2", vec_distance_l2, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
2024-07-23 14:04:17 -07:00
{"vec_distance_l1", vec_distance_l1, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
2024-06-28 22:03:54 -07:00
{"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
{"vec_distance_cosine", vec_distance_cosine, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
{"vec_length", vec_length, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
2024-07-22 21:24:44 -07:00
{"vec_type", vec_type, 1, DEFAULT_FLAGS, },
2024-06-28 22:03:54 -07:00
{"vec_to_json", vec_to_json, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
{"vec_add", vec_add, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
{"vec_sub", vec_sub, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
{"vec_slice", vec_slice, 3, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
{"vec_normalize", vec_normalize, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
{"vec_f32", vec_f32, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
{"vec_bit", vec_bit, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
{"vec_int8", vec_int8, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
2024-07-22 21:24:44 -07:00
{"vec_quantize_int8", vec_quantize_int8, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
2024-06-28 22:03:54 -07:00
{"vec_quantize_binary", vec_quantize_binary, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
2024-04-20 13:38:58 -07:00
// clang-format on
};
2024-06-28 21:38:50 -07:00
static struct {
2024-04-20 13:38:58 -07:00
char *name;
const sqlite3_module *module;
2024-06-13 16:32:57 -07:00
void *p;
void (*xDestroy)(void *);
2024-04-20 13:38:58 -07:00
} aMod[] = {
// clang-format off
{"vec0", &vec0Module, NULL, NULL},
{"vec_each", &vec_eachModule, NULL, NULL},
2024-04-20 13:38:58 -07:00
// clang-format on
};
2024-07-05 12:07:45 -07:00
for (unsigned long i = 0; i < countof(aFunc) && rc == SQLITE_OK; i++) {
2024-04-20 13:38:58 -07:00
rc = sqlite3_create_function_v2(db, aFunc[i].zFName, aFunc[i].nArg,
2024-07-05 12:07:45 -07:00
aFunc[i].flags, NULL, aFunc[i].xFunc, NULL,
NULL, NULL);
2024-04-20 13:38:58 -07:00
if (rc != SQLITE_OK) {
*pzErrMsg = sqlite3_mprintf("Error creating function %s: %s",
aFunc[i].zFName, sqlite3_errmsg(db));
return rc;
}
}
for (unsigned long i = 0; i < countof(aMod) && rc == SQLITE_OK; i++) {
rc = sqlite3_create_module_v2(db, aMod[i].name, aMod[i].module, NULL, NULL);
if (rc != SQLITE_OK) {
*pzErrMsg = sqlite3_mprintf("Error creating module %s: %s", aMod[i].name,
sqlite3_errmsg(db));
return rc;
}
}
2024-07-31 12:56:09 -07:00
2024-04-20 13:38:58 -07:00
return SQLITE_OK;
}
2024-07-31 12:56:09 -07:00