diff --git a/Makefile b/Makefile index 21a814d..9e7c18f 100644 --- a/Makefile +++ b/Makefile @@ -190,7 +190,7 @@ test-loadable-watch: watchexec --exts c,py,Makefile --clear -- make test-loadable test-unit: - $(CC) -DSQLITE_CORE tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit + $(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit site-dev: npm --prefix site run dev diff --git a/sqlite-vec.c b/sqlite-vec.c index 982e5f8..81d3c29 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -562,6 +562,18 @@ static f32 distance_hamming(const void *a, const void *b, const void *d) { return distance_hamming_u8((u8 *)a, (u8 *)b, dimensions / CHAR_BIT); } +#ifdef SQLITE_VEC_TEST +f32 _test_distance_l2_sqr_float(const f32 *a, const f32 *b, size_t dims) { + return distance_l2_sqr_float(a, b, &dims); +} +f32 _test_distance_cosine_float(const f32 *a, const f32 *b, size_t dims) { + return distance_cosine_float(a, b, &dims); +} +f32 _test_distance_hamming(const u8 *a, const u8 *b, size_t dims) { + return distance_hamming(a, b, &dims); +} +#endif + // from SQLite source: // https://github.com/sqlite/sqlite/blob/a509a90958ddb234d1785ed7801880ccb18b497e/src/json.c#L153 static const char vecJsonIsSpaceX[] = { diff --git a/tests/sqlite-vec-internal.h b/tests/sqlite-vec-internal.h index 3a1f213..1e62da2 100644 --- a/tests/sqlite-vec-internal.h +++ b/tests/sqlite-vec-internal.h @@ -75,4 +75,10 @@ int vec0_parse_partition_key_definition(const char *source, int source_length, int *out_column_name_length, int *out_column_type); +#ifdef SQLITE_VEC_TEST +float _test_distance_l2_sqr_float(const float *a, const float *b, size_t dims); +float _test_distance_cosine_float(const float *a, const float *b, size_t dims); +float _test_distance_hamming(const unsigned char *a, const unsigned char *b, size_t dims); +#endif + #endif /* SQLITE_VEC_INTERNAL_H */ diff --git a/tests/test-unit.c b/tests/test-unit.c index 959a083..2373e1c 100644 --- a/tests/test-unit.c +++ b/tests/test-unit.c @@ -3,6 +3,7 @@ #include #include #include +#include #define countof(x) (sizeof(x) / sizeof((x)[0])) @@ -477,11 +478,132 @@ void test_vec0_parse_partition_key_definition() { } } +void test_distance_l2_sqr_float() { + printf("Starting %s...\n", __func__); + float d; + + // Identical vectors: distance = 0 + { + float a[] = {1.0f, 2.0f, 3.0f}; + float b[] = {1.0f, 2.0f, 3.0f}; + d = _test_distance_l2_sqr_float(a, b, 3); + assert(d == 0.0f); + } + + // Orthogonal unit vectors: sqrt(1+1) = sqrt(2) + { + float a[] = {1.0f, 0.0f, 0.0f}; + float b[] = {0.0f, 1.0f, 0.0f}; + d = _test_distance_l2_sqr_float(a, b, 3); + assert(fabsf(d - sqrtf(2.0f)) < 1e-6f); + } + + // Known computation: [1,2,3] vs [4,5,6] = sqrt(9+9+9) = sqrt(27) + { + float a[] = {1.0f, 2.0f, 3.0f}; + float b[] = {4.0f, 5.0f, 6.0f}; + d = _test_distance_l2_sqr_float(a, b, 3); + assert(fabsf(d - sqrtf(27.0f)) < 1e-5f); + } + + // Single dimension: sqrt(16) = 4.0 + { + float a[] = {3.0f}; + float b[] = {7.0f}; + d = _test_distance_l2_sqr_float(a, b, 1); + assert(d == 4.0f); + } + + printf(" All distance_l2_sqr_float tests passed.\n"); +} + +void test_distance_cosine_float() { + printf("Starting %s...\n", __func__); + float d; + + // Identical direction: distance = 0.0 + { + float a[] = {1.0f, 0.0f}; + float b[] = {2.0f, 0.0f}; + d = _test_distance_cosine_float(a, b, 2); + assert(fabsf(d - 0.0f) < 1e-6f); + } + + // Orthogonal: distance = 1.0 + { + float a[] = {1.0f, 0.0f}; + float b[] = {0.0f, 1.0f}; + d = _test_distance_cosine_float(a, b, 2); + assert(fabsf(d - 1.0f) < 1e-6f); + } + + // Opposite direction: distance = 2.0 + { + float a[] = {1.0f, 0.0f}; + float b[] = {-1.0f, 0.0f}; + d = _test_distance_cosine_float(a, b, 2); + assert(fabsf(d - 2.0f) < 1e-6f); + } + + printf(" All distance_cosine_float tests passed.\n"); +} + +void test_distance_hamming() { + printf("Starting %s...\n", __func__); + float d; + + // Identical bitmaps: distance = 0 + { + unsigned char a[] = {0xFF}; + unsigned char b[] = {0xFF}; + d = _test_distance_hamming(a, b, 8); + assert(d == 0.0f); + } + + // All different: distance = 8 + { + unsigned char a[] = {0xFF}; + unsigned char b[] = {0x00}; + d = _test_distance_hamming(a, b, 8); + assert(d == 8.0f); + } + + // Half different: 0xFF vs 0x0F = 4 bits differ + { + unsigned char a[] = {0xFF}; + unsigned char b[] = {0x0F}; + d = _test_distance_hamming(a, b, 8); + assert(d == 4.0f); + } + + // Multi-byte: [0xFF, 0x00] vs [0x00, 0xFF] = 16 bits differ + { + unsigned char a[] = {0xFF, 0x00}; + unsigned char b[] = {0x00, 0xFF}; + d = _test_distance_hamming(a, b, 16); + assert(d == 16.0f); + } + + printf(" All distance_hamming tests passed.\n"); +} + int main() { printf("Starting unit tests...\n"); +#ifdef SQLITE_VEC_ENABLE_AVX + printf("SQLITE_VEC_ENABLE_AVX=1\n"); +#endif +#ifdef SQLITE_VEC_ENABLE_NEON + printf("SQLITE_VEC_ENABLE_NEON=1\n"); +#endif +#if !defined(SQLITE_VEC_ENABLE_AVX) && !defined(SQLITE_VEC_ENABLE_NEON) + printf("SIMD: none\n"); +#endif test_vec0_token_next(); test_vec0_scanner(); test_vec0_parse_vector_column(); test_vec0_parse_partition_key_definition(); + test_distance_l2_sqr_float(); + test_distance_cosine_float(); + test_distance_hamming(); printf("All unit tests passed.\n"); }