mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-24 16:26:37 +02:00
Remove vec_npy_each from default entrypoint and move to sqlite3_vec_numpy_init entrypoint
This commit is contained in:
parent
70dce09747
commit
763aad5d6a
6 changed files with 92 additions and 187 deletions
|
|
@ -2,7 +2,5 @@
|
|||
#include "sqlite-vec.h"
|
||||
#include <stdio.h>
|
||||
int core_init(const char *dummy) {
|
||||
int rc = sqlite3_auto_extension((void *)sqlite3_vec_init);
|
||||
if(rc != SQLITE_OK) return rc;
|
||||
return sqlite3_auto_extension((void *)sqlite3_vec_fs_read_init);
|
||||
return sqlite3_auto_extension((void *)sqlite3_vec_init);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -325,34 +325,6 @@ quantization:
|
|||
params: [vector, "[start]", "[end]"]
|
||||
desc: x
|
||||
example: select 'todo';
|
||||
|
||||
numpy:
|
||||
vec_npy_each:
|
||||
params: [npy_array]
|
||||
desc: |
|
||||
xxx
|
||||
example:
|
||||
- |
|
||||
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
|
||||
select
|
||||
rowid,
|
||||
vector,
|
||||
vec_type(vector),
|
||||
vec_to_json(vector)
|
||||
from vec_npy_each(
|
||||
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
|
||||
)
|
||||
- |
|
||||
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
|
||||
select
|
||||
rowid,
|
||||
vector,
|
||||
vec_type(vector),
|
||||
vec_to_json(vector)
|
||||
from vec_npy_each(
|
||||
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
|
||||
)
|
||||
|
||||
vec0:
|
||||
vec0:
|
||||
params: []
|
||||
|
|
@ -367,30 +339,3 @@ vec0:
|
|||
values (1, '[1, 1, 1, 1]'),
|
||||
(2, '[2, 2, 2, 2]'),
|
||||
(3, '[3, 3, 3, 3]');
|
||||
|
||||
entrypoints:
|
||||
{}
|
||||
#sqlite3_vec_init:
|
||||
# desc: |
|
||||
# asdf
|
||||
#sqlite3_vec_fs_read_init:
|
||||
# desc: |
|
||||
# asdf
|
||||
#table_functions:
|
||||
# vec_each:
|
||||
# columns: [rowid, value]
|
||||
# inputs: ["vector"]
|
||||
# desc:
|
||||
# example:
|
||||
#virtual_tables:
|
||||
# vec0:
|
||||
# desc:
|
||||
# example:
|
||||
#entrypoints:
|
||||
# sqlite3_vec_init: {}
|
||||
# sqlite3_vec_fs_read_init: {}
|
||||
#compile_options:
|
||||
# - SQLITE_VEC_ENABLE_AVX
|
||||
# - SQLITE_VEC_ENABLE_NEON
|
||||
# - SQLITE_VEC_OMIT_FS
|
||||
#
|
||||
|
|
|
|||
|
|
@ -4,51 +4,16 @@
|
|||
[Semantic Versioning](https://semver.org/), so "minor" release like "0.2.0" or
|
||||
"0.3.0" may contain breaking changes.
|
||||
|
||||
But what exactly counts as a "breaking change" in a SQLite extension? The line
|
||||
isn't so clear, unforetunately. Here are a all the surfaces that COULD count as
|
||||
a "breaking change":
|
||||
Only SQL functions, table functions, and virtual tables that are defined in the default `sqlite3_vec_init` entrypoint are considered as the `sqlite-vec` API for semantic versioning. This means that other entrypoints and other SQL functions should be considered unstable, untested, and possibly dangerous.
|
||||
|
||||
- SQL functions and columns on virtual tables
|
||||
- The C API (extension entrypoints)
|
||||
- "Bindings" like the official `pip` and `npm` packages
|
||||
- Release assets like the pre-compile extensions
|
||||
For the SQL API, a "breaking change" would include:
|
||||
|
||||
## What counts as a "breaking change"?
|
||||
- Removing a function or module
|
||||
- Changing the number or types of arguments for an SQL function
|
||||
- Changing the require arguments of position of a table functions
|
||||
- Changing the `CREATE VIRTUAL TABLE` constructor of a virtual table in a backwards-incompatible way
|
||||
- Removing columns from a virtual table or table function
|
||||
|
||||
### Changes to SQL functions
|
||||
|
||||
- Re-naming or removing an SQL function
|
||||
- Changing the number of required SQL parameters
|
||||
|
||||
### Changes to SQL virtual tables
|
||||
|
||||
- The number of
|
||||
|
||||
### Changes to the C API
|
||||
|
||||
Currently there is no "official" C API for `sqlite-vec`. However, there are
|
||||
entrypoints defined in C that C developers or developers using FFI can call. Any changes to these entrypoints would be a breaking change.
|
||||
|
||||
### Compile-time options
|
||||
|
||||
The removal of any compile time options
|
||||
|
||||
## When is `v1.0` coming?
|
||||
|
||||
In a few months! The main problems I want to solve before `v1.0` include:
|
||||
|
||||
- Metadata columns
|
||||
- Metadata filtering
|
||||
- ANN indexing
|
||||
- Quantization + pre-transformations
|
||||
|
||||
Once those items are complete, I will likely create a `v1.0` release, along with
|
||||
renaming the `vec0` virtual table modile to `vec1`. And if future major releases
|
||||
are required, a `v2.0` major releases will be made with new `vec2` virtual
|
||||
tables and so on.
|
||||
|
||||
Ideally, only a `v1` major release would be required. But who knows what the
|
||||
future has in store with vector search!
|
||||
|
||||
In general, I will try my best to maximize stability and limit the number of
|
||||
breaking changes for future `sqlite-vec` versions.
|
||||
The official "bindings" to `sqlite-vec`, including the Python/Node.js/Ruby/Go/Rust are subject to change and are not covered by semantic versioning.
|
||||
Though I have no plans to change or break them, and would include notes in changelogs if that ever needs to happen.
|
||||
|
|
|
|||
|
|
@ -7038,7 +7038,6 @@ SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
|||
// clang-format off
|
||||
{"vec0", &vec0Module, NULL, NULL},
|
||||
{"vec_each", &vec_eachModule, NULL, NULL},
|
||||
{"vec_npy_each", &vec_npy_eachModule, NULL, NULL},
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
|
|
@ -7066,7 +7065,7 @@ SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
|||
}
|
||||
|
||||
#ifndef SQLITE_VEC_OMIT_FS
|
||||
SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
||||
SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg,
|
||||
const sqlite3_api_routines *pApi) {
|
||||
UNUSED_PARAMETER(pzErrMsg);
|
||||
#ifndef SQLITE_CORE
|
||||
|
|
@ -7075,6 +7074,10 @@ SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
|||
int rc = SQLITE_OK;
|
||||
rc = sqlite3_create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE,
|
||||
NULL, vec_npy_file, NULL, NULL, NULL);
|
||||
if(rc != SQLITE_OK) {
|
||||
return rc;
|
||||
}
|
||||
rc = sqlite3_create_module_v2(db, "vec_npy_each", &vec_npy_eachModule, NULL, NULL);
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -25,19 +25,9 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
__declspec(dllexport)
|
||||
#endif
|
||||
int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
||||
SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
||||
const sqlite3_api_routines *pApi);
|
||||
|
||||
#ifdef _WIN32
|
||||
__declspec(dllexport)
|
||||
#endif
|
||||
int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
||||
const sqlite3_api_routines *pApi);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of the 'extern "C"' block */
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -119,7 +119,6 @@ FUNCTIONS = [
|
|||
MODULES = [
|
||||
"vec0",
|
||||
"vec_each",
|
||||
"vec_npy_each",
|
||||
# "vec_static_blob_entries",
|
||||
# "vec_static_blobs",
|
||||
]
|
||||
|
|
@ -1619,6 +1618,7 @@ def to_npy(arr):
|
|||
|
||||
|
||||
def test_vec_npy_each():
|
||||
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
|
||||
vec_npy_each = lambda *args: execute_all(
|
||||
db, "select rowid, * from vec_npy_each(?)", args
|
||||
)
|
||||
|
|
@ -1651,6 +1651,7 @@ def test_vec_npy_each():
|
|||
|
||||
|
||||
def test_vec_npy_each_errors():
|
||||
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
|
||||
vec_npy_each = lambda *args: execute_all(
|
||||
db, "select rowid, * from vec_npy_each(?)", args
|
||||
)
|
||||
|
|
@ -1769,7 +1770,7 @@ import tempfile
|
|||
|
||||
|
||||
def test_vec_npy_each_errors_files():
|
||||
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_fs_read_init")
|
||||
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
|
||||
|
||||
def vec_npy_each(data):
|
||||
with tempfile.NamedTemporaryFile(delete_on_close=False) as f:
|
||||
|
|
@ -2274,36 +2275,42 @@ def test_smoke():
|
|||
|
||||
db.execute("insert into vec_xyz(rowid, a) select 2, X'0000000000000040'")
|
||||
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
||||
assert chunk[
|
||||
"rowids"
|
||||
] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + bytearray(
|
||||
int(1024 * 8) - 8 * 2
|
||||
assert (
|
||||
chunk["rowids"]
|
||||
== b"\x01\x00\x00\x00\x00\x00\x00\x00"
|
||||
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
|
||||
+ bytearray(int(1024 * 8) - 8 * 2)
|
||||
)
|
||||
assert chunk["chunk_id"] == 1
|
||||
assert chunk["validity"] == b"\x03" + bytearray(int(1024 / 8) - 1)
|
||||
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
||||
assert vchunk["rowid"] == 1
|
||||
assert vchunk[
|
||||
"vectors"
|
||||
] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + bytearray(
|
||||
int(1024 * 4 * 2) - (2 * 4 * 2)
|
||||
assert (
|
||||
vchunk["vectors"]
|
||||
== b"\x00\x00\x00\x00\x00\x00\x80\x3f"
|
||||
+ b"\x00\x00\x00\x00\x00\x00\x00\x40"
|
||||
+ bytearray(int(1024 * 4 * 2) - (2 * 4 * 2))
|
||||
)
|
||||
|
||||
db.execute("insert into vec_xyz(rowid, a) select 3, X'00000000000080bf'")
|
||||
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
||||
assert chunk["chunk_id"] == 1
|
||||
assert chunk["validity"] == b"\x07" + bytearray(int(1024 / 8) - 1)
|
||||
assert chunk[
|
||||
"rowids"
|
||||
] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + b"\x03\x00\x00\x00\x00\x00\x00\x00" + bytearray(
|
||||
int(1024 * 8) - 8 * 3
|
||||
assert (
|
||||
chunk["rowids"]
|
||||
== b"\x01\x00\x00\x00\x00\x00\x00\x00"
|
||||
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
|
||||
+ b"\x03\x00\x00\x00\x00\x00\x00\x00"
|
||||
+ bytearray(int(1024 * 8) - 8 * 3)
|
||||
)
|
||||
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
||||
assert vchunk["rowid"] == 1
|
||||
assert vchunk[
|
||||
"vectors"
|
||||
] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + b"\x00\x00\x00\x00\x00\x00\x80\xbf" + bytearray(
|
||||
int(1024 * 4 * 2) - (2 * 4 * 3)
|
||||
assert (
|
||||
vchunk["vectors"]
|
||||
== b"\x00\x00\x00\x00\x00\x00\x80\x3f"
|
||||
+ b"\x00\x00\x00\x00\x00\x00\x00\x40"
|
||||
+ b"\x00\x00\x00\x00\x00\x00\x80\xbf"
|
||||
+ bytearray(int(1024 * 4 * 2) - (2 * 4 * 3))
|
||||
)
|
||||
|
||||
# db.execute("select * from vec_xyz")
|
||||
|
|
@ -2346,66 +2353,63 @@ def test_vec0_stress_small_chunks():
|
|||
{"rowid": 994, "a": _f32([99.4] * 8)},
|
||||
{"rowid": 993, "a": _f32([99.3] * 8)},
|
||||
]
|
||||
assert (
|
||||
execute_all(
|
||||
db,
|
||||
"""
|
||||
assert execute_all(
|
||||
db,
|
||||
"""
|
||||
select rowid, a, distance
|
||||
from vec_small
|
||||
where a match ?
|
||||
and k = 9
|
||||
order by distance
|
||||
""",
|
||||
[_f32([50.0] * 8)],
|
||||
)
|
||||
== [
|
||||
{
|
||||
"a": _f32([500 * 0.1] * 8),
|
||||
"distance": 0.0,
|
||||
"rowid": 500,
|
||||
},
|
||||
{
|
||||
"a": _f32([501 * 0.1] * 8),
|
||||
"distance": 0.2828384041786194,
|
||||
"rowid": 501,
|
||||
},
|
||||
{
|
||||
"a": _f32([499 * 0.1] * 8),
|
||||
"distance": 0.2828384041786194,
|
||||
"rowid": 499,
|
||||
},
|
||||
{
|
||||
"a": _f32([502 * 0.1] * 8),
|
||||
"distance": 0.5656875967979431,
|
||||
"rowid": 502,
|
||||
},
|
||||
{
|
||||
"a": _f32([498 * 0.1] * 8),
|
||||
"distance": 0.5656875967979431,
|
||||
"rowid": 498,
|
||||
},
|
||||
{
|
||||
"a": _f32([503 * 0.1] * 8),
|
||||
"distance": 0.8485260009765625,
|
||||
"rowid": 503,
|
||||
},
|
||||
{
|
||||
"a": _f32([497 * 0.1] * 8),
|
||||
"distance": 0.8485260009765625,
|
||||
"rowid": 497,
|
||||
},
|
||||
{
|
||||
"a": _f32([496 * 0.1] * 8),
|
||||
"distance": 1.1313751935958862,
|
||||
"rowid": 496,
|
||||
},
|
||||
{
|
||||
"a": _f32([504 * 0.1] * 8),
|
||||
"distance": 1.1313751935958862,
|
||||
"rowid": 504,
|
||||
},
|
||||
]
|
||||
)
|
||||
[_f32([50.0] * 8)],
|
||||
) == [
|
||||
{
|
||||
"a": _f32([500 * 0.1] * 8),
|
||||
"distance": 0.0,
|
||||
"rowid": 500,
|
||||
},
|
||||
{
|
||||
"a": _f32([501 * 0.1] * 8),
|
||||
"distance": 0.2828384041786194,
|
||||
"rowid": 501,
|
||||
},
|
||||
{
|
||||
"a": _f32([499 * 0.1] * 8),
|
||||
"distance": 0.2828384041786194,
|
||||
"rowid": 499,
|
||||
},
|
||||
{
|
||||
"a": _f32([502 * 0.1] * 8),
|
||||
"distance": 0.5656875967979431,
|
||||
"rowid": 502,
|
||||
},
|
||||
{
|
||||
"a": _f32([498 * 0.1] * 8),
|
||||
"distance": 0.5656875967979431,
|
||||
"rowid": 498,
|
||||
},
|
||||
{
|
||||
"a": _f32([503 * 0.1] * 8),
|
||||
"distance": 0.8485260009765625,
|
||||
"rowid": 503,
|
||||
},
|
||||
{
|
||||
"a": _f32([497 * 0.1] * 8),
|
||||
"distance": 0.8485260009765625,
|
||||
"rowid": 497,
|
||||
},
|
||||
{
|
||||
"a": _f32([496 * 0.1] * 8),
|
||||
"distance": 1.1313751935958862,
|
||||
"rowid": 496,
|
||||
},
|
||||
{
|
||||
"a": _f32([504 * 0.1] * 8),
|
||||
"distance": 1.1313751935958862,
|
||||
"rowid": 504,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_vec0_distance_metric():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue