mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Add ANN search support for vec0 virtual table (#273)
Add approximate nearest neighbor infrastructure to vec0: shared distance dispatch (vec0_distance_full), flat index type with parser, NEON-optimized cosine/Hamming for float32/int8, amalgamation script, and benchmark suite (benchmarks-ann/) with ground-truth generation and profiling tools. Remove unused vec_npy_each/vec_static_blobs code, fix missing stdint.h include.
This commit is contained in:
parent
e9f598abfa
commit
0de765f457
27 changed files with 2177 additions and 2116 deletions
|
|
@ -500,6 +500,83 @@ void test_vec0_parse_vector_column() {
|
|||
assert(rc == SQLITE_ERROR);
|
||||
}
|
||||
|
||||
// indexed by flat()
|
||||
{
|
||||
const char *input = "emb float[768] indexed by flat()";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_OK);
|
||||
assert(col.index_type == VEC0_INDEX_TYPE_FLAT);
|
||||
assert(col.dimensions == 768);
|
||||
sqlite3_free(col.name);
|
||||
}
|
||||
|
||||
// indexed by flat() with distance_metric
|
||||
{
|
||||
const char *input = "emb float[768] distance_metric=cosine indexed by flat()";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_OK);
|
||||
assert(col.index_type == VEC0_INDEX_TYPE_FLAT);
|
||||
assert(col.distance_metric == VEC0_DISTANCE_METRIC_COSINE);
|
||||
sqlite3_free(col.name);
|
||||
}
|
||||
|
||||
// indexed by flat() on int8
|
||||
{
|
||||
const char *input = "emb int8[256] indexed by flat()";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_OK);
|
||||
assert(col.index_type == VEC0_INDEX_TYPE_FLAT);
|
||||
assert(col.element_type == SQLITE_VEC_ELEMENT_TYPE_INT8);
|
||||
sqlite3_free(col.name);
|
||||
}
|
||||
|
||||
// indexed by flat() on bit
|
||||
{
|
||||
const char *input = "emb bit[64] indexed by flat()";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_OK);
|
||||
assert(col.index_type == VEC0_INDEX_TYPE_FLAT);
|
||||
assert(col.element_type == SQLITE_VEC_ELEMENT_TYPE_BIT);
|
||||
sqlite3_free(col.name);
|
||||
}
|
||||
|
||||
// default index_type is FLAT
|
||||
{
|
||||
const char *input = "emb float[768]";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_OK);
|
||||
assert(col.index_type == VEC0_INDEX_TYPE_FLAT);
|
||||
sqlite3_free(col.name);
|
||||
}
|
||||
|
||||
// Error: indexed by (missing type name)
|
||||
{
|
||||
const char *input = "emb float[768] indexed by";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_ERROR);
|
||||
}
|
||||
|
||||
// Error: indexed by unknown()
|
||||
{
|
||||
const char *input = "emb float[768] indexed by unknown()";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_ERROR);
|
||||
}
|
||||
|
||||
// Error: indexed by flat (missing parens)
|
||||
{
|
||||
const char *input = "emb float[768] indexed by flat";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_ERROR);
|
||||
}
|
||||
|
||||
// Error: indexed flat() (missing "by")
|
||||
{
|
||||
const char *input = "emb float[768] indexed flat()";
|
||||
rc = vec0_parse_vector_column(input, (int)strlen(input), &col);
|
||||
assert(rc == SQLITE_ERROR);
|
||||
}
|
||||
|
||||
printf(" All vec0_parse_vector_column tests passed.\n");
|
||||
}
|
||||
|
||||
|
|
@ -656,6 +733,30 @@ void test_distance_hamming() {
|
|||
assert(d == 16.0f);
|
||||
}
|
||||
|
||||
// Large vector (256 bits = 32 bytes) — exercises NEON path on ARM
|
||||
{
|
||||
unsigned char a[32];
|
||||
unsigned char b[32];
|
||||
memset(a, 0xFF, 32);
|
||||
memset(b, 0x00, 32);
|
||||
d = _test_distance_hamming(a, b, 256);
|
||||
assert(d == 256.0f);
|
||||
}
|
||||
|
||||
// Large vector (1024 bits = 128 bytes) — exercises 64-byte NEON loop
|
||||
{
|
||||
unsigned char a[128];
|
||||
unsigned char b[128];
|
||||
memset(a, 0x00, 128);
|
||||
memset(b, 0x00, 128);
|
||||
// Set every other byte to 0xFF in a, 0x00 in b -> 8 bits per byte * 64 bytes = 512
|
||||
for (int i = 0; i < 128; i += 2) {
|
||||
a[i] = 0xFF;
|
||||
}
|
||||
d = _test_distance_hamming(a, b, 1024);
|
||||
assert(d == 512.0f);
|
||||
}
|
||||
|
||||
printf(" All distance_hamming tests passed.\n");
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue