Add ANN search support for vec0 virtual table (#273)

Add approximate nearest neighbor infrastructure to vec0: shared distance
dispatch (vec0_distance_full), flat index type with parser, NEON-optimized
cosine/Hamming for float32/int8, amalgamation script, and benchmark suite
(benchmarks-ann/) with ground-truth generation and profiling tools. Remove
unused vec_npy_each/vec_static_blobs code, fix missing stdint.h include.
This commit is contained in:
Alex Garcia 2026-03-31 01:03:32 -07:00 committed by GitHub
parent e9f598abfa
commit 0de765f457
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 2177 additions and 2116 deletions

View file

@ -568,65 +568,6 @@ select 'todo';
-- 'todo'
```
## NumPy Utilities {#numpy}
Functions to read data from or work with [NumPy arrays](https://numpy.org/doc/stable/reference/generated/numpy.array.html).
### `vec_npy_each(vector)` {#vec_npy_each}
xxx
```sql
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
select
rowid,
vector,
vec_type(vector),
vec_to_json(vector)
from vec_npy_each(
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
)
/*
┌───────┬─────────────┬──────────────────┬─────────────────────┐
│ rowid │ vector │ vec_type(vector) │ vec_to_json(vector) │
├───────┼─────────────┼──────────────────┼─────────────────────┤
│ 0 │ X'0000803F' │ 'float32' │ '[1.000000]' │
├───────┼─────────────┼──────────────────┼─────────────────────┤
│ 1 │ X'00000040' │ 'float32' │ '[2.000000]' │
├───────┼─────────────┼──────────────────┼─────────────────────┤
│ 2 │ X'00004040' │ 'float32' │ '[3.000000]' │
└───────┴─────────────┴──────────────────┴─────────────────────┘
*/
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
select
rowid,
vector,
vec_type(vector),
vec_to_json(vector)
from vec_npy_each(
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
)
/*
┌───────┬─────────────┬──────────────────┬─────────────────────┐
│ rowid │ vector │ vec_type(vector) │ vec_to_json(vector) │
├───────┼─────────────┼──────────────────┼─────────────────────┤
│ 0 │ X'0000803F' │ 'float32' │ '[1.000000]' │
├───────┼─────────────┼──────────────────┼─────────────────────┤
│ 1 │ X'00000040' │ 'float32' │ '[2.000000]' │
├───────┼─────────────┼──────────────────┼─────────────────────┤
│ 2 │ X'00004040' │ 'float32' │ '[3.000000]' │
└───────┴─────────────┴──────────────────┴─────────────────────┘
*/
```
## Meta {#meta}