Remove vec_npy_each from default entrypoint and move to sqlite3_vec_numpy_init entrypoint

2026-04-25 00:36:56 +02:00 · 2024-09-25 23:07:17 -07:00 · 2024-09-25 23:07:17 -07:00 · 763aad5d6a
commit 763aad5d6a
parent 70dce09747
6 changed files with 92 additions and 187 deletions
--- a/examples/sqlite3-cli/core_init.c
+++ b/examples/sqlite3-cli/core_init.c
@ -2,7 +2,5 @@
 #include "sqlite-vec.h"
 #include <stdio.h>
 int core_init(const char *dummy) {
-  int rc = sqlite3_auto_extension((void *)sqlite3_vec_init);
+  return sqlite3_auto_extension((void *)sqlite3_vec_init);
  if(rc != SQLITE_OK) return rc;
  return sqlite3_auto_extension((void *)sqlite3_vec_fs_read_init);
 }
--- a/reference.yaml
+++ b/reference.yaml
@ -325,34 +325,6 @@ quantization:
    params: [vector, "[start]", "[end]"]
    desc: x
    example: select 'todo';
 numpy:
  vec_npy_each:
    params: [npy_array]
    desc: |
      xxx
    example:
      - |
        -- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
        select
          rowid,
          vector,
          vec_type(vector),
          vec_to_json(vector)
        from vec_npy_each(
          X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
        )
      - |
        -- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
        select
          rowid,
          vector,
          vec_type(vector),
          vec_to_json(vector)
        from vec_npy_each(
          X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
        )
 vec0:
  vec0:
    params: []
@ -367,30 +339,3 @@ vec0:
        values (1, '[1, 1, 1, 1]'),
          (2, '[2, 2, 2, 2]'),
          (3, '[3, 3, 3, 3]');
 entrypoints:
  {}
  #sqlite3_vec_init:
  #  desc: |
  #    asdf
  #sqlite3_vec_fs_read_init:
  #  desc: |
  #    asdf
 #table_functions:
 #  vec_each:
 #    columns: [rowid, value]
 #    inputs: ["vector"]
 #    desc:
 #    example:
 #virtual_tables:
 #  vec0:
 #    desc:
 #    example:
 #entrypoints:
 #  sqlite3_vec_init: {}
 #  sqlite3_vec_fs_read_init: {}
 #compile_options:
 #  - SQLITE_VEC_ENABLE_AVX
 #  - SQLITE_VEC_ENABLE_NEON
 #  - SQLITE_VEC_OMIT_FS
 #
--- a/site/versioning.md
+++ b/site/versioning.md
@ -4,51 +4,16 @@
 [Semantic Versioning](https://semver.org/), so "minor" release like "0.2.0" or
 "0.3.0" may contain breaking changes.
-But what exactly counts as a "breaking change" in a SQLite extension? The line
+Only SQL functions, table functions, and virtual tables that are defined in the default `sqlite3_vec_init` entrypoint are considered as the `sqlite-vec` API for semantic versioning. This means that other entrypoints and other SQL functions should be considered unstable, untested, and possibly dangerous.
 isn't so clear, unforetunately. Here are a all the surfaces that COULD count as
 a "breaking change":
- SQL functions and columns on virtual tables
+For the SQL API, a "breaking change" would include:
 - The C API (extension entrypoints)
 - "Bindings" like the official `pip` and `npm` packages
 - Release assets like the pre-compile extensions
-## What counts as a "breaking change"?
+- Removing a function or module
 - Changing the number or types of arguments for an SQL function
 - Changing the require arguments of position of a table functions
 - Changing the `CREATE VIRTUAL TABLE` constructor of a virtual table in a backwards-incompatible way
 - Removing columns from a virtual table or table function
 ### Changes to SQL functions
- Re-naming or removing an SQL function
+The official "bindings" to `sqlite-vec`, including the Python/Node.js/Ruby/Go/Rust are subject to change and are not covered by semantic versioning.
- Changing the number of required SQL parameters
+Though I have no plans to change or break them, and would include notes in changelogs if that ever needs to happen.
 ### Changes to SQL virtual tables
 - The number of
 ### Changes to the C API
 Currently there is no "official" C API for `sqlite-vec`. However, there are
 entrypoints defined in C that C developers or developers using FFI can call. Any changes to these entrypoints would be a breaking change.
 ### Compile-time options
 The removal of any compile time options
 ## When is `v1.0` coming?
 In a few months! The main problems I want to solve before `v1.0` include:
 - Metadata columns
 - Metadata filtering
 - ANN indexing
 - Quantization + pre-transformations
 Once those items are complete, I will likely create a `v1.0` release, along with
 renaming the `vec0` virtual table modile to `vec1`. And if future major releases
 are required, a `v2.0` major releases will be made with new `vec2` virtual
 tables and so on.
 Ideally, only a `v1` major release would be required. But who knows what the
 future has in store with vector search!
 In general, I will try my best to maximize stability and limit the number of
 breaking changes for future `sqlite-vec` versions.
--- a/sqlite-vec.c
+++ b/sqlite-vec.c
@ -7038,7 +7038,6 @@ SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
      // clang-format off
    {"vec0",          &vec0Module,          NULL, NULL},
    {"vec_each",      &vec_eachModule,      NULL, NULL},
    {"vec_npy_each",  &vec_npy_eachModule,  NULL, NULL},
      // clang-format on
  };
@ -7066,7 +7065,7 @@ SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
 }
 #ifndef SQLITE_VEC_OMIT_FS
-SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
+SQLITE_VEC_API int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg,
                                            const sqlite3_api_routines *pApi) {
  UNUSED_PARAMETER(pzErrMsg);
 #ifndef SQLITE_CORE
@ -7075,6 +7074,10 @@ SQLITE_VEC_API int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
  int rc = SQLITE_OK;
  rc = sqlite3_create_function_v2(db, "vec_npy_file", 1, SQLITE_RESULT_SUBTYPE,
                                  NULL, vec_npy_file, NULL, NULL, NULL);
  if(rc != SQLITE_OK) {
    return rc;
  }
  rc = sqlite3_create_module_v2(db, "vec_npy_each", &vec_npy_eachModule, NULL, NULL);
  return rc;
 }
 #endif
--- a/sqlite-vec.h.tmpl
+++ b/sqlite-vec.h.tmpl
@ -25,19 +25,9 @@
 extern "C" {
 #endif
-#ifdef _WIN32
+SQLITE_VEC_API int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
 __declspec(dllexport)
 #endif
 int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg,
                  const sqlite3_api_routines *pApi);
 #ifdef _WIN32
 __declspec(dllexport)
 #endif
 int sqlite3_vec_fs_read_init(sqlite3 *db, char **pzErrMsg,
                          const sqlite3_api_routines *pApi);
 #ifdef __cplusplus
 }  /* end of the 'extern "C"' block */
 #endif
--- a/tests/test-loadable.py
+++ b/tests/test-loadable.py
@ -119,7 +119,6 @@ FUNCTIONS = [
 MODULES = [
    "vec0",
    "vec_each",
    "vec_npy_each",
    # "vec_static_blob_entries",
    # "vec_static_blobs",
 ]
@ -1619,6 +1618,7 @@ def to_npy(arr):
 def test_vec_npy_each():
    db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
    vec_npy_each = lambda *args: execute_all(
        db, "select rowid, * from vec_npy_each(?)", args
    )
@ -1651,6 +1651,7 @@ def test_vec_npy_each():
 def test_vec_npy_each_errors():
    db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
    vec_npy_each = lambda *args: execute_all(
        db, "select rowid, * from vec_npy_each(?)", args
    )
@ -1769,7 +1770,7 @@ import tempfile
 def test_vec_npy_each_errors_files():
-    db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_fs_read_init")
+    db = connect(EXT_PATH, extra_entrypoint="sqlite3_vec_numpy_init")
    def vec_npy_each(data):
        with tempfile.NamedTemporaryFile(delete_on_close=False) as f:
@ -2274,36 +2275,42 @@ def test_smoke():
    db.execute("insert into vec_xyz(rowid, a) select 2, X'0000000000000040'")
    chunk = db.execute("select * from vec_xyz_chunks").fetchone()
-    assert chunk[
+    assert (
-        "rowids"
+        chunk["rowids"]
-    ] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + bytearray(
+        == b"\x01\x00\x00\x00\x00\x00\x00\x00"
-        int(1024 * 8) - 8 * 2
+        + b"\x02\x00\x00\x00\x00\x00\x00\x00"
        + bytearray(int(1024 * 8) - 8 * 2)
    )
    assert chunk["chunk_id"] == 1
    assert chunk["validity"] == b"\x03" + bytearray(int(1024 / 8) - 1)
    vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
    assert vchunk["rowid"] == 1
-    assert vchunk[
+    assert (
-        "vectors"
+        vchunk["vectors"]
-    ] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + bytearray(
+        == b"\x00\x00\x00\x00\x00\x00\x80\x3f"
-        int(1024 * 4 * 2) - (2 * 4 * 2)
+        + b"\x00\x00\x00\x00\x00\x00\x00\x40"
        + bytearray(int(1024 * 4 * 2) - (2 * 4 * 2))
    )
    db.execute("insert into vec_xyz(rowid, a) select 3, X'00000000000080bf'")
    chunk = db.execute("select * from vec_xyz_chunks").fetchone()
    assert chunk["chunk_id"] == 1
    assert chunk["validity"] == b"\x07" + bytearray(int(1024 / 8) - 1)
-    assert chunk[
+    assert (
-        "rowids"
+        chunk["rowids"]
-    ] == b"\x01\x00\x00\x00\x00\x00\x00\x00" + b"\x02\x00\x00\x00\x00\x00\x00\x00" + b"\x03\x00\x00\x00\x00\x00\x00\x00" + bytearray(
+        == b"\x01\x00\x00\x00\x00\x00\x00\x00"
-        int(1024 * 8) - 8 * 3
+        + b"\x02\x00\x00\x00\x00\x00\x00\x00"
        + b"\x03\x00\x00\x00\x00\x00\x00\x00"
        + bytearray(int(1024 * 8) - 8 * 3)
    )
    vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
    assert vchunk["rowid"] == 1
-    assert vchunk[
+    assert (
-        "vectors"
+        vchunk["vectors"]
-    ] == b"\x00\x00\x00\x00\x00\x00\x80\x3f" + b"\x00\x00\x00\x00\x00\x00\x00\x40" + b"\x00\x00\x00\x00\x00\x00\x80\xbf" + bytearray(
+        == b"\x00\x00\x00\x00\x00\x00\x80\x3f"
-        int(1024 * 4 * 2) - (2 * 4 * 3)
+        + b"\x00\x00\x00\x00\x00\x00\x00\x40"
        + b"\x00\x00\x00\x00\x00\x00\x80\xbf"
        + bytearray(int(1024 * 4 * 2) - (2 * 4 * 3))
    )
    # db.execute("select * from vec_xyz")
@ -2346,8 +2353,7 @@ def test_vec0_stress_small_chunks():
        {"rowid": 994, "a": _f32([99.4] * 8)},
        {"rowid": 993, "a": _f32([99.3] * 8)},
    ]
-    assert (
+    assert execute_all(
        execute_all(
        db,
        """
              select rowid, a, distance
@ -2357,8 +2363,7 @@ def test_vec0_stress_small_chunks():
              order by distance
            """,
        [_f32([50.0] * 8)],
-        )
+    ) == [
        == [
        {
            "a": _f32([500 * 0.1] * 8),
            "distance": 0.0,
@ -2405,7 +2410,6 @@ def test_vec0_stress_small_chunks():
            "rowid": 504,
        },
    ]
    )
 def test_vec0_distance_metric():