mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Remove vec_npy_each from default entrypoint and move to sqlite3_vec_numpy_init entrypoint
This commit is contained in:
parent
70dce09747
commit
763aad5d6a
6 changed files with 92 additions and 187 deletions
|
|
@ -2,7 +2,5 @@
|
||||||
#include "sqlite-vec.h"
|
#include "sqlite-vec.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
int core_init(const char *dummy) {
|
int core_init(const char *dummy) {
|
||||||
int rc = sqlite3_auto_extension((void *)sqlite3_vec_init);
|
return sqlite3_auto_extension((void *)sqlite3_vec_init);
|
||||||
if(rc != SQLITE_OK) return rc;
|
|
||||||
return sqlite3_auto_extension((void *)sqlite3_vec_fs_read_init);
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -325,34 +325,6 @@ quantization:
|
||||||
params: [vector, "[start]", "[end]"]
|
params: [vector, "[start]", "[end]"]
|
||||||
desc: x
|
desc: x
|
||||||
example: select 'todo';
|
example: select 'todo';
|
||||||
|
|
||||||
numpy:
|
|
||||||
vec_npy_each:
|
|
||||||
params: [npy_array]
|
|
||||||
desc: |
|
|
||||||
xxx
|
|
||||||
example:
|
|
||||||
- |
|
|
||||||
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
|
|
||||||
select
|
|
||||||
rowid,
|
|
||||||
vector,
|
|
||||||
vec_type(vector),
|
|
||||||
vec_to_json(vector)
|
|
||||||
from vec_npy_each(
|
|
||||||
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
|
|
||||||
)
|
|
||||||
- |
|
|
||||||
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
|
|
||||||
select
|
|
||||||
rowid,
|
|
||||||
vector,
|
|
||||||
vec_type(vector),
|
|
||||||
vec_to_json(vector)
|
|
||||||
from vec_npy_each(
|
|
||||||
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
|
|
||||||
)
|
|
||||||
|
|
||||||
vec0:
|
vec0:
|
||||||
vec0:
|
vec0:
|
||||||
params: []
|
params: []
|
||||||
|
|
@ -367,30 +339,3 @@ vec0:
|
||||||
values (1, '[1, 1, 1, 1]'),
|
values (1, '[1, 1, 1, 1]'),
|
||||||
(2, '[2, 2, 2, 2]'),
|
(2, '[2, 2, 2, 2]'),
|
||||||
(3, '[3, 3, 3, 3]');
|
(3, '[3, 3, 3, 3]');
|
||||||
|
|
||||||
entrypoints:
|
|
||||||
{}
|
|
||||||
#sqlite3_vec_init:
|
|
||||||
# desc: |
|
|
||||||
# asdf
|
|
||||||
#sqlite3_vec_fs_read_init:
|
|
||||||
# desc: |
|
|
||||||
# asdf
|
|
||||||
#table_functions:
|
|
||||||
# vec_each:
|
|
||||||
# columns: [rowid, value]
|
|
||||||
# inputs: ["vector"]
|
|
||||||
# desc:
|
|
||||||
# example:
|
|
||||||
#virtual_tables:
|
|
||||||
# vec0:
|
|
||||||
# desc:
|
|
||||||
# example:
|
|
||||||
#entrypoints:
|
|
||||||
# sqlite3_vec_init: {}
|
|
||||||
# sqlite3_vec_fs_read_init: {}
|
|
||||||
#compile_options:
|
|
||||||
# - SQLITE_VEC_ENABLE_AVX
|
|
||||||
# - SQLITE_VEC_ENABLE_NEON
|
|
||||||
# - SQLITE_VEC_OMIT_FS
|
|
||||||
#
|
|
||||||
|
|
|
||||||
|
|
@ -4,51 +4,16 @@
|
||||||
[Semantic Versioning](https://semver.org/), so "minor" release like "0.2.0" or
|
[Semantic Versioning](https://semver.org/), so "minor" release like "0.2.0" or
|
||||||
"0.3.0" may contain breaking changes.
|
"0.3.0" may contain breaking changes.
|
||||||
|
|
||||||
But what exactly counts as a "breaking change" in a SQLite extension? The line
|
Only SQL functions, table functions, and virtual tables that are defined in the default `sqlite3_vec_init` entrypoint are considered as the `sqlite-vec` API for semantic versioning. This means that other entrypoints and other SQL functions should be considered unstable, untested, and possibly dangerous.
|
||||||
isn't so clear, unforetunately. Here are a all the surfaces that COULD count as
|
|
||||||
a "breaking change":
|
|
||||||
|
|
||||||
- SQL functions and columns on virtual tables
|
For the SQL API, a "breaking change" would include:
|
||||||
- The C API (extension entrypoints)
|
|
||||||
- "Bindings" like the official `pip` and `npm` packages
|
|
||||||
- Release assets like the pre-compile extensions
|
|
||||||
|
|
||||||
## What counts as a "breaking change"?
|
- Removing a function or module
|
||||||
|
- Changing the number or types of arguments for an SQL function
|
||||||
|
- Changing the require arguments of position of a table functions
|
||||||
|
- Changing the `CREATE VIRTUAL TABLE` constructor of a virtual table in a backwards-incompatible way
|
||||||
|
- Removing columns from a virtual table or table function
|
||||||
|
|
||||||
### Changes to SQL functions
|
|
||||||
|
|
||||||
- Re-naming or removing an SQL function
|
The official "bindings" to `sqlite-vec`, including the Python/Node.js/Ruby/Go/Rust are subject to change and are not covered by semantic versioning.
|
||||||
- Changing the number of required SQL parameters
|
Though I have no plans to change or break them, and would include notes in changelogs if that ever needs to happen.
|
||||||
|
|
||||||
### Changes to SQL virtual tables
|
|
||||||
|
|
||||||
- The number of
|
|
||||||
|
|
||||||
### Changes to the C API
|
|
||||||
|
|
||||||
Currently there is no "official" C API for `sqlite-vec`. However, there are
|
|
||||||
entrypoints defined in C that C developers or developers using FFI can call. Any changes to these entrypoints would be a breaking change.
|
|
||||||
|
|
||||||
### Compile-time options
|
|
||||||
|
|
||||||
The removal of any compile time options
|
|
||||||
|
|
||||||
## When is `v1.0` coming?
|
|
||||||
|
|
||||||
In a few months! The main problems I want to solve before `v1.0` include:
|
|
||||||
|
|
||||||
- Metadata columns
|
|
||||||
- Metadata filtering
|
|
||||||
- ANN indexing
|
|
||||||
- Quantization + pre-transformations
|
|
||||||
|
|
||||||
Once those items are complete, I will likely create a `v1.0` release, along with
|
|
||||||
renaming the `vec0` virtual table modile to `vec1`. And if future major releases
|
|
||||||
are required, a `v2.0` major releases will be made with new `vec2` virtual
|
|
||||||
tables and so on.
|
|
||||||
|
|
||||||
Ideally, only a `v1` major release would be required. But who knows what the
|
|
||||||
future has in store with vector search!
|
|
||||||
|
|
||||||
In general, I will try my best to maximize stability and limit the number of
|
|
||||||
breaking changes for future `sqlite-vec` versions.
|
|
||||||
|
|
|
||||||
|
|
@ -7038,7 +7038,6 @@ SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
||||||
// clang-format off
|
// clang-format off
|
||||||
{"vec0", &vec0Module, NULL, NULL},
|
{"vec0", &vec0Module, NULL, NULL},
|
||||||
{"vec_each", &vec_eachModule, NULL, NULL},
|
{"vec_each", &vec_eachModule, NULL, NULL},
|
||||||
{"vec_npy_each", &vec_npy_eachModule, NULL, NULL},
|
|
||||||
// clang-format on
|
// clang-format on
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -7066,7 +7065,7 @@ SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef SQLITE_VEC_OMIT_FS
|
#ifndef SQLITE_VEC_OMIT_FS
|
||||||
SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg,
|
||||||
const sqlite3_api_routines *pApi) {
|
const sqlite3_api_routines *pApi) {
|
||||||
UNUSED_PARAMETER(pzErrMsg);
|
UNUSED_PARAMETER(pzErrMsg);
|
||||||
#ifndef SQLITE_CORE
|
#ifndef SQLITE_CORE
|
||||||
|
|
@ -7075,6 +7074,10 @@ SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
||||||
int rc = SQLITE_OK;
|
int rc = SQLITE_OK;
|
||||||
rc = sqlite3_create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE,
|
rc = sqlite3_create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE,
|
||||||
NULL, vec_npy_file, NULL, NULL, NULL);
|
NULL, vec_npy_file, NULL, NULL, NULL);
|
||||||
|
if(rc != SQLITE_OK) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
rc = sqlite3_create_module_v2(db, "vec_npy_each", &vec_npy_eachModule, NULL, NULL);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -25,19 +25,9 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef _WIN32
|
SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
||||||
__declspec(dllexport)
|
|
||||||
#endif
|
|
||||||
int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
|
|
||||||
const sqlite3_api_routines *pApi);
|
const sqlite3_api_routines *pApi);
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
__declspec(dllexport)
|
|
||||||
#endif
|
|
||||||
int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
|
|
||||||
const sqlite3_api_routines *pApi);
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* end of the 'extern "C"' block */
|
} /* end of the 'extern "C"' block */
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -119,7 +119,6 @@ FUNCTIONS = [
|
||||||
MODULES = [
|
MODULES = [
|
||||||
"vec0",
|
"vec0",
|
||||||
"vec_each",
|
"vec_each",
|
||||||
"vec_npy_each",
|
|
||||||
# "vec_static_blob_entries",
|
# "vec_static_blob_entries",
|
||||||
# "vec_static_blobs",
|
# "vec_static_blobs",
|
||||||
]
|
]
|
||||||
|
|
@ -1619,6 +1618,7 @@ def to_npy(arr):
|
||||||
|
|
||||||
|
|
||||||
def test_vec_npy_each():
|
def test_vec_npy_each():
|
||||||
|
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
|
||||||
vec_npy_each = lambda *args: execute_all(
|
vec_npy_each = lambda *args: execute_all(
|
||||||
db, "select rowid, * from vec_npy_each(?)", args
|
db, "select rowid, * from vec_npy_each(?)", args
|
||||||
)
|
)
|
||||||
|
|
@ -1651,6 +1651,7 @@ def test_vec_npy_each():
|
||||||
|
|
||||||
|
|
||||||
def test_vec_npy_each_errors():
|
def test_vec_npy_each_errors():
|
||||||
|
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
|
||||||
vec_npy_each = lambda *args: execute_all(
|
vec_npy_each = lambda *args: execute_all(
|
||||||
db, "select rowid, * from vec_npy_each(?)", args
|
db, "select rowid, * from vec_npy_each(?)", args
|
||||||
)
|
)
|
||||||
|
|
@ -1769,7 +1770,7 @@ import tempfile
|
||||||
|
|
||||||
|
|
||||||
def test_vec_npy_each_errors_files():
|
def test_vec_npy_each_errors_files():
|
||||||
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_fs_read_init")
|
db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
|
||||||
|
|
||||||
def vec_npy_each(data):
|
def vec_npy_each(data):
|
||||||
with tempfile.NamedTemporaryFile(delete_on_close=False) as f:
|
with tempfile.NamedTemporaryFile(delete_on_close=False) as f:
|
||||||
|
|
@ -2274,36 +2275,42 @@ def test_smoke():
|
||||||
|
|
||||||
db.execute("insert into vec_xyz(rowid, a) select 2, X'0000000000000040'")
|
db.execute("insert into vec_xyz(rowid, a) select 2, X'0000000000000040'")
|
||||||
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
||||||
assert chunk[
|
assert (
|
||||||
"rowids"
|
chunk["rowids"]
|
||||||
] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + bytearray(
|
== b"\x01\x00\x00\x00\x00\x00\x00\x00"
|
||||||
int(1024 * 8) - 8 * 2
|
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
|
||||||
|
+ bytearray(int(1024 * 8) - 8 * 2)
|
||||||
)
|
)
|
||||||
assert chunk["chunk_id"] == 1
|
assert chunk["chunk_id"] == 1
|
||||||
assert chunk["validity"] == b"\x03" + bytearray(int(1024 / 8) - 1)
|
assert chunk["validity"] == b"\x03" + bytearray(int(1024 / 8) - 1)
|
||||||
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
||||||
assert vchunk["rowid"] == 1
|
assert vchunk["rowid"] == 1
|
||||||
assert vchunk[
|
assert (
|
||||||
"vectors"
|
vchunk["vectors"]
|
||||||
] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + bytearray(
|
== b"\x00\x00\x00\x00\x00\x00\x80\x3f"
|
||||||
int(1024 * 4 * 2) - (2 * 4 * 2)
|
+ b"\x00\x00\x00\x00\x00\x00\x00\x40"
|
||||||
|
+ bytearray(int(1024 * 4 * 2) - (2 * 4 * 2))
|
||||||
)
|
)
|
||||||
|
|
||||||
db.execute("insert into vec_xyz(rowid, a) select 3, X'00000000000080bf'")
|
db.execute("insert into vec_xyz(rowid, a) select 3, X'00000000000080bf'")
|
||||||
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
chunk = db.execute("select * from vec_xyz_chunks").fetchone()
|
||||||
assert chunk["chunk_id"] == 1
|
assert chunk["chunk_id"] == 1
|
||||||
assert chunk["validity"] == b"\x07" + bytearray(int(1024 / 8) - 1)
|
assert chunk["validity"] == b"\x07" + bytearray(int(1024 / 8) - 1)
|
||||||
assert chunk[
|
assert (
|
||||||
"rowids"
|
chunk["rowids"]
|
||||||
] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + b"\x03\x00\x00\x00\x00\x00\x00\x00" + bytearray(
|
== b"\x01\x00\x00\x00\x00\x00\x00\x00"
|
||||||
int(1024 * 8) - 8 * 3
|
+ b"\x02\x00\x00\x00\x00\x00\x00\x00"
|
||||||
|
+ b"\x03\x00\x00\x00\x00\x00\x00\x00"
|
||||||
|
+ bytearray(int(1024 * 8) - 8 * 3)
|
||||||
)
|
)
|
||||||
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
|
||||||
assert vchunk["rowid"] == 1
|
assert vchunk["rowid"] == 1
|
||||||
assert vchunk[
|
assert (
|
||||||
"vectors"
|
vchunk["vectors"]
|
||||||
] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + b"\x00\x00\x00\x00\x00\x00\x80\xbf" + bytearray(
|
== b"\x00\x00\x00\x00\x00\x00\x80\x3f"
|
||||||
int(1024 * 4 * 2) - (2 * 4 * 3)
|
+ b"\x00\x00\x00\x00\x00\x00\x00\x40"
|
||||||
|
+ b"\x00\x00\x00\x00\x00\x00\x80\xbf"
|
||||||
|
+ bytearray(int(1024 * 4 * 2) - (2 * 4 * 3))
|
||||||
)
|
)
|
||||||
|
|
||||||
# db.execute("select * from vec_xyz")
|
# db.execute("select * from vec_xyz")
|
||||||
|
|
@ -2346,8 +2353,7 @@ def test_vec0_stress_small_chunks():
|
||||||
{"rowid": 994, "a": _f32([99.4] * 8)},
|
{"rowid": 994, "a": _f32([99.4] * 8)},
|
||||||
{"rowid": 993, "a": _f32([99.3] * 8)},
|
{"rowid": 993, "a": _f32([99.3] * 8)},
|
||||||
]
|
]
|
||||||
assert (
|
assert execute_all(
|
||||||
execute_all(
|
|
||||||
db,
|
db,
|
||||||
"""
|
"""
|
||||||
select rowid, a, distance
|
select rowid, a, distance
|
||||||
|
|
@ -2357,8 +2363,7 @@ def test_vec0_stress_small_chunks():
|
||||||
order by distance
|
order by distance
|
||||||
""",
|
""",
|
||||||
[_f32([50.0] * 8)],
|
[_f32([50.0] * 8)],
|
||||||
)
|
) == [
|
||||||
== [
|
|
||||||
{
|
{
|
||||||
"a": _f32([500 * 0.1] * 8),
|
"a": _f32([500 * 0.1] * 8),
|
||||||
"distance": 0.0,
|
"distance": 0.0,
|
||||||
|
|
@ -2405,7 +2410,6 @@ def test_vec0_stress_small_chunks():
|
||||||
"rowid": 504,
|
"rowid": 504,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_vec0_distance_metric():
|
def test_vec0_distance_metric():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue