vec_type(), API references

2026-04-25 08:46:49 +02:00 · 2024-07-22 21:24:44 -07:00 · 2024-07-22 21:24:44 -07:00 · ff6cf96e2a
commit ff6cf96e2a
parent cfd8e9a46b
6 changed files with 677 additions and 240 deletions
--- a/reference.yaml
+++ b/reference.yaml
@ -1,33 +1,45 @@
 sections:
  meta:
    title: Meta
    desc: TODO
  constructors:
    title: Constructors
-    desc: TODO
+    desc: |
      SQL functions that "construct" vectors with different element types.
      Currently, only `float32`, `int8`, and `bit` vectors are supported.
  op:
    title: Operations
-    desc: TODO
+    desc: |
      Different operations and utilities for working with vectors.
  distance:
    title: Distance functions
-    desc: TODO
+    desc: Various algorithms to calculate distance between two vectors.
  quantization:
    title: Quantization
-    desc: TODO
+    desc: Various techniques to "compress" a vector by reducing precision and accuracy.
-functions:
+  numpy:
    title: "NumPy Utilities"
    desc: Functions to read data from or work with [NumPy arrays](https://numpy.org/doc/stable/reference/generated/numpy.array.html).
  meta:
    title: Meta
    desc: Helper functions to debug `sqlite-vec` installations.
  entrypoints:
    title: Entrypoints
    desc: All the named entrypoints that load in different `sqlite-vec` functions and options.
 #  vec0:
 #    title: "vec0 Virtual Table"
 #    desc: TODO
 meta:
  vec_version:
    params: []
    section: meta
    desc: Returns a version string of the current `sqlite-vec` installation.
    example: select vec_version();
  vec_debug:
    params: []
    section: meta
    desc: Returns debugging information of the current `sqlite-vec` installation.
    example: select vec_debug();
 constructors:
  vec_f32:
    params: [vector]
    section: constructors
    desc: |
      Creates a float vector from a BLOB or JSON text. If a BLOB is provided,
      the length must be divisible by 4, as a float takes up 4 bytes of space each.
@ -42,7 +54,6 @@ functions:
      - select vec_f32(X'AA');
  vec_int8:
    params: [vector]
    section: constructors
    desc: |
      Creates a 8-bit integer vector from a BLOB or JSON text. If a BLOB is provided,
      the length must be divisible by 4, as a float takes up 4 bytes of space each.
@ -60,19 +71,18 @@ functions:
  vec_bit:
    params: [vector]
    section: constructors
    desc: |
      Creates a binary vector from a BLOB.
-      The returned value is a BLOB with 4 bytes per element, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
+      The returned value is a BLOB with 1 byte per 8 elements, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
      of `224`.
    example:
      - select vec_bit(X'F0');
      - select subtype(vec_bit(X'F0'));
      - select vec_to_json(vec_bit(X'F0'));
 op:
  vec_length:
    params: [vector]
    section: op
    desc: |
      Returns the number of elements in the given vector.
      The vector can be `JSON`, `BLOB`, or the result of a [constructor function](#constructors).
@ -84,9 +94,20 @@ functions:
      - select vec_length(vec_int8(X'AABBCCDD'));
      - select vec_length(vec_bit(X'AABBCCDD'));
      - select vec_length(X'CCDD');
  vec_type:
    params: [vector]
    desc: |
      Returns the name of the type of `vector` as text. One of `'float32'`, `'int8'`, or `'bit'`.
      This function will return an error if `vector` is invalid.
    example:
      - select vec_type('[.1, .2]');
      - select vec_type(X'AABBCCDD');
      - select vec_type(vec_int8(X'AABBCCDD'));
      - select vec_type(vec_bit(X'AABBCCDD'));
      - select vec_type(X'CCDD');
  vec_add:
    params: [a, b]
    section: op
    desc: |
      Adds every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
      must be of the same type and same length. Only `float32` and `int8` vectors are supported.
@ -119,7 +140,6 @@ functions:
      - select vec_add(vec_bit(X'AA'), vec_bit(X'BB'));
  vec_sub:
    params: [a, b]
    section: op
    desc: |
      Subtracts every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
      must be of the same type and same length. Only `float32` and `int8` vectors are supported.
@ -152,7 +172,6 @@ functions:
      - select vec_sub(vec_bit(X'AA'), vec_bit(X'BB'));
  vec_normalize:
    params: [vector]
    section: op
    desc: |
      Performs L2 normalization on the given vector. Only float32 vectors are currently supported.
@ -172,7 +191,6 @@ functions:
        );
  vec_slice:
    params: [vector, start, end]
    section: op
    desc: |
      Extract a subset of `vector` from the `start` element (inclusive) to the `end` element (exclusive). TODO check
@ -208,7 +226,6 @@ functions:
        );
  vec_to_json:
    params: [vector]
    section: op
    desc: |
      Represents a vector as JSON text. The input vector can be a vector BLOB or JSON text.
@ -219,10 +236,45 @@ functions:
      - select vec_to_json(vec_bit(X'AABBCCDD'));
      - select vec_to_json('[1,2,3,4]');
      - select vec_to_json('invalid');
  vec_each:
    params: [vector]
    desc: |
      A table function to iterate through every element in a vector. One row id returned per element in a vector.
      ```sql
      CREATE TABLE vec_each(
        rowid int,    -- The
        vector HIDDEN -- input parameter: A well-formed vector value
      )
      ```
      Returns an error if `vector` is not a valid vector.
    example:
      - select rowid, value from vec_each('[1,2,3,4]');
      - select rowid, value from vec_each(X'AABBCCDD00112233');
      - select rowid, value from vec_each(vec_int8(X'AABBCCDD'));
      - select rowid, value from vec_each(vec_bit(X'F0'));
 distance:
  vec_distance_L2:
    params: [a, b]
    desc: |
      Calculates the L2 euclidian distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
      Returns an error under the following conditions:
      - `a` or `b` are invalid vectors
      - `a` or `b` do not share the same vector element types (ex float32 or int8)
      - `a` or `b` are bit vectors. Use [`vec_distance_hamming()`](#vec_distance_hamming) for distance calculations between two bitvectors.
      - `a` or `b` do not have the same length.
    example:
      - select vec_distance_L2('[1, 1]', '[2, 2]');
      - select vec_distance_L2('[1, 1]', '[-2, -2]');
      - select vec_distance_L2('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
      - select vec_distance_L2(X'AABBCCDD', X'00112233');
      - select vec_distance_L2('[1, 1]', vec_int8('[2, 2]'));
      - select vec_distance_L2(vec_bit(X'AA'), vec_bit(X'BB'));
  vec_distance_cosine:
    params: [a, b]
    section: distance
    desc: |
      Calculates the cosine distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
@ -236,9 +288,10 @@ functions:
      - select vec_distance_cosine('[1, 1]', '[-2, -2]');
      - select vec_distance_cosine('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
      - select vec_distance_cosine(X'AABBCCDD', X'00112233');
      - select vec_distance_cosine('[1, 1]', vec_int8('[2, 2]'));
      - select vec_distance_cosine(vec_bit(X'AA'), vec_bit(X'BB'));
  vec_distance_hamming:
    params: [a, b]
    section: distance
    desc: |
      Calculates the hamming distance between two bitvectors `a` and `b`. Only valid for bitvectors.
@ -250,34 +303,85 @@ functions:
      - select vec_distance_hamming(vec_bit(X'00'), vec_bit(X'FF'));
      - select vec_distance_hamming(vec_bit(X'FF'), vec_bit(X'FF'));
      - select vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'44'));
-      - select vec_distance_hamming(X'F0', X'00');
+      - select vec_distance_hamming('[1, 1]', '[0, 0]');
-  vec_distance_l2:
+
-    params: [a, b]
+quantization:
-    section: distance
+  vec_quantize_binary:
    params: [vector]
    desc: |
      Quantize a float32 or int8 vector into a bitvector.
      For every element in the vector, a `1` is assigned to positive numbers and a `0` is assigned to negative numbers.
      These values are then packed into a bit vector.
      Returns an error if `vector` is invalid, or if `vector` is not a float32 or int8 vector.
    example:
      - select vec_quantize_binary('[1, 2, 3, 4, 5, 6, 7, 8]');
      - select vec_quantize_binary('[1, 2, 3, 4, -5, -6, -7, -8]');
      - select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
      - select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
      - select vec_quantize_binary(vec_int8(X'11223344'));
      - select vec_quantize_binary(vec_bit(X'FF'));
  vec_quantize_i8:
    params: [vector, "[start]", "[end]"]
    desc: x
    example: select 'todo';
-  vec_quantize_binary:
+numpy:
  vec_npy_each:
    params: [vector]
-    section: quantization
+    desc: |
-    desc: x
+      xxx
-    example: select 'todo';
+    example:
-  vec_quantize_i8:
+      - |
-    params: [vector, "[start]", "[end]"]
+        -- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
-    section: quantization
+        select
-    desc: x
+          rowid,
-    example: select 'todo';
+          vector,
          vec_type(vector),
          vec_to_json(vector)
        from vec_npy_each(
          X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
        )
      - |
        -- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
        select
          rowid,
          vector,
          vec_type(vector),
          vec_to_json(vector)
        from vec_npy_each(
          X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
        )
 vec0:
  vec0:
    params: []
    desc: TODO
    example:
      - |
        create virtual table vec_items using vec0(
          contents_embedding float[4]
        );
      - |
        insert into vec_items(rowid, contents_embedding)
        values (1, '[1, 1, 1, 1]'),
          (2, '[2, 2, 2, 2]'),
          (3, '[3, 3, 3, 3]');
 entrypoints:
  {}
  #sqlite3_vec_init:
  #  desc: |
  #    asdf
  #sqlite3_vec_fs_read_init:
  #  desc: |
  #    asdf
 #table_functions:
 #  vec_each:
 #    columns: [rowid, value]
 #    inputs: ["vector"]
 #    desc:
 #    example:
 #  vec_npy_each:
 #    columns: [rowid, vector]
 #    inputs: ["input"]
 #    desc:
 #    example:
 #virtual_tables:
 #  vec0:
 #    desc:
--- a/site/api-reference.md
+++ b/site/api-reference.md
@ -1,45 +1,23 @@
 ---
 outline: 2
 ---
 # API Reference
 A complete reference to all the SQL scalar functions, table functions, and virtual tables inside `sqlite-vec`.
 ::: warning
 sqlite-vec is pre-v1, so expect breaking changes.
 :::
 [[toc]]
 ## Meta {#meta} 
 TODO
 ### `vec_version()` {#vec_version}
 Returns a version string of the current `sqlite-vec` installation.
 ```sql
 select vec_version();
 -- 'v0.0.1-alpha.36'
 ```
 ### `vec_debug()` {#vec_debug}
 Returns debugging information of the current `sqlite-vec` installation.
 ```sql
 select vec_debug();
 /*
 'Version: v0.0.1-alpha.36
 Date: 2024-07-16T23:06:41Z-0700
 Commit: e507bc0230de6dc44c7ff3b4895785edd734f31d
 Build flags: avx '
 */
 ```
 ## Constructors {#constructors} 
-TODO
+SQL functions that "construct" vectors with different element types.
 Currently, only `float32`, `int8`, and `bit` vectors are supported.
 ### `vec_f32(vector)` {#vec_f32}
@ -52,7 +30,7 @@ of `223`.
 ```sql
 select vec_f32('[.1, .2, .3, 4]');
-- X'CDCCCC3DCDCC4C3E9A99993E008040'
+-- X'CDCCCC3DCDCC4C3E9A99993E00008040'
 select subtype(vec_f32('[.1, .2, .3, 4]'));
 -- 223
@ -81,7 +59,7 @@ of `225`.
 ```sql
 select vec_int8('[1, 2, 3, 4]');
-- X'1234'
+-- X'01020304'
 select subtype(vec_int8('[1, 2, 3, 4]'));
 -- 225
@ -102,7 +80,7 @@ select vec_int8('[999]');
 Creates a binary vector from a BLOB.
-The returned value is a BLOB with 4 bytes per element, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
+The returned value is a BLOB with 1 byte per 8 elements, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
 of `224`.
@ -121,7 +99,8 @@ select vec_to_json(vec_bit(X'F0'));
 ## Operations {#op} 
-TODO
+Different operations and utilities for working with vectors.
 ### `vec_length(vector)` {#vec_length}
@ -148,6 +127,32 @@ select vec_length(X'CCDD');
 -- ❌ invalid float32 vector BLOB length. Must be divisible by 4, found 2
 ```
 ### `vec_type(vector)` {#vec_type}
 Returns the name of the type of `vector` as text. One of `'float32'`, `'int8'`, or `'bit'`.
 This function will return an error if `vector` is invalid.
 ```sql
 select vec_type('[.1, .2]');
 -- 'float32'
 select vec_type(X'AABBCCDD');
 -- 'float32'
 select vec_type(vec_int8(X'AABBCCDD'));
 -- 'int8'
 select vec_type(vec_bit(X'AABBCCDD'));
 -- 'bit'
 select vec_type(X'CCDD');
 -- ❌ invalid float32 vector BLOB length. Must be divisible by 4, found 2
 ```
 ### `vec_add(a, b)` {#vec_add}
@ -165,7 +170,7 @@ select vec_add(
  '[.1, .2, .3]',
  '[.4, .5, .6]'
 );
-- X'0003F3333333F6766663F'
+-- X'0000003F3333333F6766663F'
 select vec_to_json(
  vec_add(
@ -243,7 +248,7 @@ Returns an error if the input is an invalid vector or not a float32 vector.
 ```sql
 select vec_normalize('[2, 3, 1, -4]');
-- X'BAF4BA3E8B37C3FBAF43A3EBAF43ABF'
+-- X'BAF4BA3E8B370C3FBAF43A3EBAF43ABF'
 select vec_to_json(
  vec_normalize('[2, 3, 1, -4]')
@ -277,7 +282,7 @@ Returns an error in the following conditions:
 ```sql
 select vec_slice('[1, 2,3, 4]', 0, 2);
-- X'00803F00040'
+-- X'0000803F00000040'
 select vec_to_json(
  vec_slice('[1, 2,3, 4]', 0, 2)
@ -331,11 +336,134 @@ select vec_to_json('invalid');
 -- ❌ JSON array parsing error: Input does not start with '['
 ```
 ### `vec_each(vector)` {#vec_each}
 A table function to iterate through every element in a vector. One row id returned per element in a vector.
 ```sql
 CREATE TABLE vec_each(
  rowid int,    -- The
  vector HIDDEN -- input parameter: A well-formed vector value
 )
 ```
 Returns an error if `vector` is not a valid vector.
 ```sql
 select rowid, value from vec_each('[1,2,3,4]');
 /*
 ┌───────┬───────┐
 │ rowid │ value │
 ├───────┼───────┤
 │ 0     │ 1     │
 ├───────┼───────┤
 │ 1     │ 2     │
 ├───────┼───────┤
 │ 2     │ 3     │
 ├───────┼───────┤
 │ 3     │ 4     │
 └───────┴───────┘
 */
 select rowid, value from vec_each(X'AABBCCDD00112233');
 /*
 ┌───────┬──────────────────────┐
 │ rowid │ value                │
 ├───────┼──────────────────────┤
 │ 0     │ -1844071490169864200 │
 ├───────┼──────────────────────┤
 │ 1     │ 3.773402568185702e-8 │
 └───────┴──────────────────────┘
 */
 select rowid, value from vec_each(vec_int8(X'AABBCCDD'));
 /*
 ┌───────┬───────┐
 │ rowid │ value │
 ├───────┼───────┤
 │ 0     │ -86   │
 ├───────┼───────┤
 │ 1     │ -69   │
 ├───────┼───────┤
 │ 2     │ -52   │
 ├───────┼───────┤
 │ 3     │ -35   │
 └───────┴───────┘
 */
 select rowid, value from vec_each(vec_bit(X'F0'));
 /*
 ┌───────┬───────┐
 │ rowid │ value │
 ├───────┼───────┤
 │ 0     │ 1     │
 ├───────┼───────┤
 │ 1     │ 1     │
 ├───────┼───────┤
 │ 2     │ 1     │
 ├───────┼───────┤
 │ 3     │ 1     │
 ├───────┼───────┤
 │ 4     │ 0     │
 ├───────┼───────┤
 │ 5     │ 0     │
 ├───────┼───────┤
 │ 6     │ 0     │
 ├───────┼───────┤
 │ 7     │ 0     │
 └───────┴───────┘
 */
 ```
 ## Distance functions {#distance} 
-TODO
+Various algorithms to calculate distance between two vectors.
 ### `vec_distance_L2(a, b)` {#vec_distance_L2}
 Calculates the L2 euclidian distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
 Returns an error under the following conditions:
 - `a` or `b` are invalid vectors
 - `a` or `b` do not share the same vector element types (ex float32 or int8)
 - `a` or `b` are bit vectors. Use [`vec_distance_hamming()`](#vec_distance_hamming) for distance calculations between two bitvectors.
 - `a` or `b` do not have the same length.
 ```sql
 select vec_distance_L2('[1, 1]', '[2, 2]');
 -- 1.4142135381698608
 select vec_distance_L2('[1, 1]', '[-2, -2]');
 -- 4.242640495300293
 select vec_distance_L2('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
 -- 5.7157673835754395
 select vec_distance_L2(X'AABBCCDD', X'00112233');
 -- 1844071490169864200
 select vec_distance_L2('[1, 1]', vec_int8('[2, 2]'));
 -- ❌ Vector type mistmatch. First vector has type float32, while the second has type int8.
 select vec_distance_L2(vec_bit(X'AA'), vec_bit(X'BB'));
 -- ❌ Cannot calculate L2 distance between two bitvectors.
 ```
 ### `vec_distance_cosine(a, b)` {#vec_distance_cosine}
@ -361,6 +489,12 @@ select vec_distance_cosine('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
 select vec_distance_cosine(X'AABBCCDD', X'00112233');
 -- 2
 select vec_distance_cosine('[1, 1]', vec_int8('[2, 2]'));
 -- ❌ Vector type mistmatch. First vector has type float32, while the second has type int8.
 select vec_distance_cosine(vec_bit(X'AA'), vec_bit(X'BB'));
 -- ❌ Cannot calculate cosine distance between two bitvectors.
 ```
@ -384,34 +518,43 @@ select vec_distance_hamming(vec_bit(X'FF'), vec_bit(X'FF'));
 select vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'44'));
 -- 4
-select vec_distance_hamming(X'F0', X'00');
+select vec_distance_hamming('[1, 1]', '[0, 0]');
-- ❌ Error reading 1st vector: invalid float32 vector BLOB length. Must be divisible by 4, found 1
+-- ❌ Cannot calculate hamming distance between two float32 vectors.
 ```
 ### `vec_distance_l2(a, b)` {#vec_distance_l2}
 x
 ```sql
 select 'todo';
 -- 'todo'
 ```
 ## Quantization {#quantization} 
-TODO
+Various techniques to "compress" a vector by reducing precision and accuracy.
 ### `vec_quantize_binary(vector)` {#vec_quantize_binary}
-x
+Quantize a float32 or int8 vector into a bitvector.
 For every element in the vector, a `1` is assigned to positive numbers and a `0` is assigned to negative numbers.
 These values are then packed into a bit vector.
 Returns an error if `vector` is invalid, or if `vector` is not a float32 or int8 vector.
 ```sql
-select 'todo';
+select vec_quantize_binary('[1, 2, 3, 4, 5, 6, 7, 8]');
-- 'todo'
+-- X'FF'
 select vec_quantize_binary('[1, 2, 3, 4, -5, -6, -7, -8]');
 -- X'0F'
 select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
 -- X'00'
 select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
 -- X'00'
 select vec_quantize_binary(vec_int8(X'11223344'));
 -- ❌ Binary quantization requires vectors with a length divisible by 8
 select vec_quantize_binary(vec_bit(X'FF'));
 -- ❌ Can only binary quantize float or int8 vectors
 ```
@ -427,3 +570,97 @@ select 'todo';
 ```
 ## NumPy Utilities {#numpy} 
 Functions to read data from or work with [NumPy arrays](https://numpy.org/doc/stable/reference/generated/numpy.array.html).
 ### `vec_npy_each(vector)` {#vec_npy_each}
 xxx
 ```sql
 -- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
 select
  rowid,
  vector,
  vec_type(vector),
  vec_to_json(vector)
 from vec_npy_each(
  X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
 )
 /*
 ┌───────┬─────────────┬──────────────────┬─────────────────────┐
 │ rowid │ vector      │ vec_type(vector) │ vec_to_json(vector) │
 ├───────┼─────────────┼──────────────────┼─────────────────────┤
 │ 0     │ X'0000803F' │ 'float32'        │ '[1.000000]'        │
 ├───────┼─────────────┼──────────────────┼─────────────────────┤
 │ 1     │ X'00000040' │ 'float32'        │ '[2.000000]'        │
 ├───────┼─────────────┼──────────────────┼─────────────────────┤
 │ 2     │ X'00004040' │ 'float32'        │ '[3.000000]'        │
 └───────┴─────────────┴──────────────────┴─────────────────────┘
 */
 -- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
 select
  rowid,
  vector,
  vec_type(vector),
  vec_to_json(vector)
 from vec_npy_each(
  X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
 )
 /*
 ┌───────┬─────────────┬──────────────────┬─────────────────────┐
 │ rowid │ vector      │ vec_type(vector) │ vec_to_json(vector) │
 ├───────┼─────────────┼──────────────────┼─────────────────────┤
 │ 0     │ X'0000803F' │ 'float32'        │ '[1.000000]'        │
 ├───────┼─────────────┼──────────────────┼─────────────────────┤
 │ 1     │ X'00000040' │ 'float32'        │ '[2.000000]'        │
 ├───────┼─────────────┼──────────────────┼─────────────────────┤
 │ 2     │ X'00004040' │ 'float32'        │ '[3.000000]'        │
 └───────┴─────────────┴──────────────────┴─────────────────────┘
 */
 ```
 ## Meta {#meta} 
 Helper functions to debug `sqlite-vec` installations.
 ### `vec_version()` {#vec_version}
 Returns a version string of the current `sqlite-vec` installation.
 ```sql
 select vec_version();
 -- 'v0.0.1-alpha.36'
 ```
 ### `vec_debug()` {#vec_debug}
 Returns debugging information of the current `sqlite-vec` installation.
 ```sql
 select vec_debug();
 /*
 'Version: v0.0.1-alpha.36
 Date: 2024-07-16T23:06:41Z-0700
 Commit: e507bc0230de6dc44c7ff3b4895785edd734f31d
 Build flags: avx '
 */
 ```
 ## Entrypoints {#entrypoints} 
 All the named entrypoints that load in different `sqlite-vec` functions and options.
--- a/site/build-ref.mjs
+++ b/site/build-ref.mjs
@ -6,6 +6,22 @@ import { readFileSync, writeFileSync } from "node:fs";
 import * as v from "valibot";
 import { table } from "table";
 const HEADER = `---
 outline: 2
 ---
 # API Reference
 A complete reference to all the SQL scalar functions, table functions, and virtual tables inside \`sqlite-vec\`.
 ::: warning
 sqlite-vec is pre-v1, so expect breaking changes.
 :::
 [[toc]]
 `;
 const REF_PATH = resolve(
  dirname(fileURLToPath(import.meta.url)),
  "../reference.yaml"
@ -15,32 +31,25 @@ const EXT_PATH = resolve(
  "../dist/vec0"
 );
-const DocSchema = v.object({
+const DocSchema = v.objectWithRest(
-  sections: v.record(
+  {
-    v.string(),
+    sections: v.record(
-    v.object({
+      v.string(),
-      title: v.string(),
+      v.object({
-      desc: v.string(),
+        title: v.string(),
-    })
+        desc: v.string(),
-  ),
+      })
-  functions: v.record(
+    ),
-    v.string(),
+  },
-    v.object({
+  v.record(
      params: v.array(v.string()),
      desc: v.string(),
      section: v.string(),
      example: v.union([v.string(), v.array(v.string())]),
    })
  ),
  /*table_functions: v.record(
    v.string(),
    v.object({
      params: v.array(v.string()),
      desc: v.string(),
      example: v.union([v.string(), v.array(v.string())]),
    })
-  ),*/
+  )
-});
+);
 const tableConfig = {
  border: {
@ -78,7 +87,7 @@ function formatSingleValue(value) {
  if (value instanceof Uint8Array) {
    let s = "X'";
    for (const v of value) {
-      s += v.toString(16).toUpperCase();
+      s += v.toString(16).toUpperCase().padStart(2, "0");
    }
    s += "'";
    return `-- ${s}`;
@ -87,12 +96,13 @@ function formatSingleValue(value) {
    return "-- " + JSON.stringify(value, null, 2);
 }
 function formatValue(value) {
-  if (typeof value === "string" || typeof value === "number") return value;
+  if (typeof value === "string") return `'${value}'`;
  if (typeof value === "number") return value;
  if (value === null) return "NULL";
  if (value instanceof Uint8Array) {
    let s = "X'";
    for (const v of value) {
-      s += v.toString(16);
+      s += v.toString(16).toUpperCase().padStart(2, "0");
    }
    s += "'";
    return s;
@ -125,7 +135,11 @@ function renderExamples(db, name, example) {
    results = null;
    try {
      stmt = db.prepare(sql);
-      stmt.raw(true);
+      try {
        stmt.raw(true);
      } catch (err) {
        1;
      }
    } catch (error) {
      console.error(`Error preparing statement for ${name}:`);
      console.error(error);
@ -157,37 +171,27 @@ function renderExamples(db, name, example) {
  return md;
 }
-let md = `# API Reference
+let md = HEADER;
 ::: warning
 sqlite-vec is pre-v1, so expect breaking changes.
 :::
 [[toc]]
 `;
 const doc = v.parse(DocSchema, load(readFileSync(REF_PATH, "utf8")));
 const db = new Database();
 db.loadExtension(EXT_PATH);
-let lastSection = null;
+for (const section in doc.sections) {
-for (const [name, { params, desc, example, section }] of Object.entries(
+  md += `## ${doc.sections[section].title} {#${section}} \n\n`;
-  doc.functions
+  md += doc.sections[section].desc;
-)) {
+  md += "\n\n";
  const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`;
-  if (lastSection != section) {
+  for (const [name, { params, desc, example }] of Object.entries(
-    md += `## ${doc.sections[section].title} {#${section}} \n\n`;
+    doc[section]
-    md += doc.sections[section].desc;
+  )) {
-    md += "\n\n";
+    const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`;
-    lastSection = section;
+
    md += "### " + headerText + "\n\n";
    md += desc + "\n\n";
    md += renderExamples(db, name, example);
  }
  md += "### " + headerText + "\n\n";
  md += desc + "\n\n";
  md += renderExamples(db, name, example);
 }
 writeFileSync("api-reference.md", md, "utf8");
--- a/site/versioning.md
+++ b/site/versioning.md
@ -0,0 +1,49 @@
 # Semantic Versioning for `sqlite-vec`
 `sqlite-vec` is pre-v1, so according to the rules of [Semantic Versioning](https://semver.org/),
 so "minor" release like "0.2.0" or "0.3.0" may contain breaking changes.
 But what exactly counts as a "breaking change" in a SQLite extension? The line isn't so clear, unforetunately.
 Here are a all the surfaces that COULD count as a "breaking change":
 - SQL functions and columns on virtual tables
 - The C API (extension entrypoints)
 - "Bindings" like the official `pip` and `npm` packages
 - Release assets like the pre-compile extensions
 ## What counts as a "breaking change"?
 ### Changes to SQL functions
 - Re-naming or removing an SQL function
 - Changing the number of required SQL parameters
 ### Changes to SQL virtual tables
 - The number of
 ### Changes to the C API
 Currently there is no "official" C API for `sqlite-vec`. However, there are entrypoints defined in C that C developers or developers using FFI can call. Any
 ### Compile-time options
 The removal of any compile time options
 ## When is `v1.0` coming?
 In a few months! The main problems I want to solve before `v1.0` include:
 - Metadata columns
 - Metadata filtering
 - ANN indexing
 - Quantization + pre-transformations
 Once those items are complete, I will likely create a `v1.0` release, along with renaming the `vec0` virtual table modile to `vec1`. And if future major releases are required, a `v2.0` major releases will be made with new `vec2` virtual tables and so on.
 Ideally, only a `v1` major release would be required. But who knows what the future has in store with vector search!
 In general, I will try my best to maximize stability and limit the number of breaking changes for future `sqlite-vec` versions.
--- a/sqlite-vec.c
+++ b/sqlite-vec.c
@ -1082,8 +1082,105 @@ finish:
  return;
 }
-static void vec_quantize_i8(sqlite3_context *context, int argc,
+char * vec_type_name(enum VectorElementType elementType) {
  switch(elementType) {
    case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
      return "float32";
    case SQLITE_VEC_ELEMENT_TYPE_INT8:
      return "int8";
    case SQLITE_VEC_ELEMENT_TYPE_BIT:
      return "bit";
  }
 }
 static void vec_type(sqlite3_context *context, int argc,
                                sqlite3_value **argv) {
  assert(argc == 1);
  void *vector;
  size_t dimensions;
  vector_cleanup cleanup;
  char *pzError;
  enum VectorElementType elementType;
  int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
                             &cleanup, &pzError);
  if (rc != SQLITE_OK) {
    sqlite3_result_error(context, pzError, -1);
    sqlite3_free(pzError);
    return;
  }
  sqlite3_result_text(context, vec_type_name(elementType), -1, SQLITE_STATIC);
  cleanup(vector);
 }
 static void vec_quantize_binary(sqlite3_context *context, int argc,
                                sqlite3_value **argv) {
  assert(argc == 1);
  void *vector;
  size_t dimensions;
  vector_cleanup vectorCleanup;
  char *pzError;
  enum VectorElementType elementType;
  int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
                             &vectorCleanup, &pzError);
  if (rc != SQLITE_OK) {
    sqlite3_result_error(context, pzError, -1);
    sqlite3_free(pzError);
    return;
  }
  if(dimensions <= 0) {
    sqlite3_result_error(context, "Zero length vectors are not supported.", -1);
    goto cleanup;
    return;
  }
  if((dimensions % CHAR_BIT) != 0) {
    sqlite3_result_error(context, "Binary quantization requires vectors with a length divisible by 8", -1);
    goto cleanup;
    return;
  }
  int sz = dimensions / CHAR_BIT;
  u8 *out = sqlite3_malloc(sz);
  if (!out) {
    sqlite3_result_error_code(context, SQLITE_NOMEM);
    goto cleanup;
    return;
  }
  memset(out, 0, sz);
  switch(elementType) {
    case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
      for (size_t i = 0; i < dimensions; i++) {
        int res = ((f32 *)vector)[i] > 0.0;
        out[i / 8] |= (res << (i % 8));
      }
      break;
    }
    case SQLITE_VEC_ELEMENT_TYPE_INT8: {
      for (size_t i = 0; i < dimensions; i++) {
        int res = ((i8 *)vector)[i] > 0;
        out[i / 8] |= (res << (i % 8));
      }
      break;
    }
    case SQLITE_VEC_ELEMENT_TYPE_BIT: {
      sqlite3_result_error(context, "Can only binary quantize float or int8 vectors", -1);
      sqlite3_free(out);
      return;
    }
  }
  sqlite3_result_blob(context, out, sz, sqlite3_free);
  sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
  cleanup:
    vectorCleanup(vector);
 }
 static void vec_quantize_int8(sqlite3_context *context, int argc,
                            sqlite3_value **argv) {
  assert(argc == 2);
  f32 *srcVector;
  size_t dimensions;
  fvec_cleanup srcCleanup;
@ -1099,39 +1196,23 @@ static void vec_quantize_i8(sqlite3_context *context, int argc,
  int sz = dimensions * sizeof(i8);
  out = sqlite3_malloc(sz);
  if (!out) {
-    rc = SQLITE_NOMEM;
+    sqlite3_result_error_nomem(context);
    goto cleanup;
  }
  memset(out, 0, sz);
-  if (argc == 2) {
+  if ((sqlite3_value_type(argv[1]) != SQLITE_TEXT) ||
-    if ((sqlite3_value_type(argv[1]) != SQLITE_TEXT) ||
+      (sqlite3_value_bytes(argv[1]) != strlen("unit")) ||
-        (sqlite3_value_bytes(argv[1]) != strlen("unit")) ||
+      (sqlite3_stricmp((const char *)sqlite3_value_text(argv[1]), "unit") !=
-        (sqlite3_stricmp((const char *)sqlite3_value_text(argv[1]), "unit") !=
+        0)) {
-         0)) {
+    sqlite3_result_error(context, "2nd argument to vec_quantize_i8() must be 'unit'.", -1);
      sqlite3_result_error(context,
                           "2nd argument to vec_quantize_i8() must be 'unit', "
                           "or ranges must be provided.",
                           -1);
      sqlite3_free(out);
      goto cleanup;
    }
    f32 step = (1.0 - (-1.0)) / 255;
    for (size_t i = 0; i < dimensions; i++) {
      out[i] = ((srcVector[i] - (-1.0)) / step) - 128;
    }
  } else if (argc == 3) {
    // f32 * minVector, maxVector;
    // size_t d;
    // fvec_cleanup minCleanup, maxCleanup;
    // int rc = fvec_from_value(argv[1], )
    sqlite3_free(out);
    // TODO
    sqlite3_result_error(
        context, "ranges parameter not supported in vec_quantize_i8 yet.", -1);
    goto cleanup;
  }
  f32 step = (1.0 - (-1.0)) / 255;
  for (size_t i = 0; i < dimensions; i++) {
    out[i] = ((srcVector[i] - (-1.0)) / step) - 128;
  }
  sqlite3_result_blob(context, out, dimensions * sizeof(i8), sqlite3_free);
  sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
@ -1140,58 +1221,6 @@ cleanup:
  srcCleanup(srcVector);
 }
 static void vec_quantize_binary(sqlite3_context *context, int argc,
                                sqlite3_value **argv) {
  assert(argc == 1);
  void *vector;
  size_t dimensions;
  vector_cleanup cleanup;
  char *pzError;
  enum VectorElementType elementType;
  int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
                             &cleanup, &pzError);
  if (rc != SQLITE_OK) {
    sqlite3_result_error(context, pzError, -1);
    sqlite3_free(pzError);
    return;
  }
  if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
    int sz = dimensions / CHAR_BIT;
    u8 *out = sqlite3_malloc(sz);
    if (!out) {
      cleanup(vector);
      sqlite3_result_error_code(context, SQLITE_NOMEM);
      return;
    }
    memset(out, 0, sz);
    for (size_t i = 0; i < dimensions; i++) {
      int res = ((f32 *)vector)[i] > 0.0;
      out[i / 8] |= (res << (i % 8));
    }
    sqlite3_result_blob(context, out, dimensions / CHAR_BIT, sqlite3_free);
    sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
  } else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) {
    int sz = dimensions / CHAR_BIT;
    u8 *out = sqlite3_malloc(sz);
    if (!out) {
      cleanup(vector);
      sqlite3_result_error_code(context, SQLITE_NOMEM);
      return;
    }
    memset(out, 0, sz);
    for (size_t i = 0; i < dimensions; i++) {
      int res = ((i8 *)vector)[i] > 0;
      out[i / 8] |= (res << (i % 8));
    }
    sqlite3_result_blob(context, out, dimensions / CHAR_BIT, sqlite3_free);
    sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
  } else {
    sqlite3_result_error(context,
                         "Can only binary quantize float or int8 vectors", -1);
    return;
  }
 }
 static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) {
  assert(argc == 2);
@ -2778,7 +2807,7 @@ static int vec_npy_eachColumnBuffer(vec_npy_each_cursor *pCur,
    }
    case SQLITE_VEC_ELEMENT_TYPE_INT8:
    case SQLITE_VEC_ELEMENT_TYPE_BIT: {
-      // TODO
+      // https://github.com/asg017/sqlite-vec/issues/42
      sqlite3_result_error(context,
                           "vec_npy_each only supports float32 vectors", -1);
      break;
@ -2806,7 +2835,7 @@ static int vec_npy_eachColumnFile(vec_npy_each_cursor *pCur,
    }
    case SQLITE_VEC_ELEMENT_TYPE_INT8:
    case SQLITE_VEC_ELEMENT_TYPE_BIT: {
-      // TODO
+      // https://github.com/asg017/sqlite-vec/issues/42
      sqlite3_result_error(context,
                           "vec_npy_each only supports float32 vectors", -1);
      break;
@ -5902,13 +5931,13 @@ static sqlite3_module vec0Module = {
    /* xCommit       */ 0,
    /* xRollback     */ 0,
    /* xFindFunction */ 0,
-    /* xRename       */ 0, // TODO
+    /* xRename       */ 0, // https://github.com/asg017/sqlite-vec/issues/43
    /* xSavepoint    */ 0,
    /* xRelease      */ 0,
    /* xRollbackTo   */ 0,
    /* xShadowName   */ vec0ShadowName,
 #if SQLITE_VERSION_NUMBER >= 3044000
-    /* xIntegrity    */ 0, // TODO
+    /* xIntegrity    */ 0, // https://github.com/asg017/sqlite-vec/issues/44
 #endif
 };
 #pragma endregion
@ -6661,6 +6690,7 @@ __declspec(dllexport)
    {"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE,                         },
    {"vec_distance_cosine", vec_distance_cosine,  2, DEFAULT_FLAGS | SQLITE_SUBTYPE,                         },
    {"vec_length",          vec_length,           1, DEFAULT_FLAGS | SQLITE_SUBTYPE,                         },
    {"vec_type",           vec_type,           1, DEFAULT_FLAGS,                         },
    {"vec_to_json",         vec_to_json,          1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
    {"vec_add",             vec_add,              2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
    {"vec_sub",             vec_sub,              2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
@ -6669,8 +6699,7 @@ __declspec(dllexport)
    {"vec_f32",             vec_f32,              1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
    {"vec_bit",             vec_bit,              1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
    {"vec_int8",            vec_int8,             1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
-    {"vec_quantize_i8",     vec_quantize_i8,      2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
+    {"vec_quantize_int8",     vec_quantize_int8,      2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
    {"vec_quantize_i8",     vec_quantize_i8,      3, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
    {"vec_quantize_binary", vec_quantize_binary,  1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
    {"vec_static_blob_from_raw", vec_static_blob_from_raw,  4, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE },
      // clang-format on
--- a/tests/test-loadable.py
+++ b/tests/test-loadable.py
@ -110,12 +110,12 @@ FUNCTIONS = [
    "vec_length",
    "vec_normalize",
    "vec_quantize_binary",
-    "vec_quantize_i8",
+    "vec_quantize_int8",
    "vec_quantize_i8",
    "vec_slice",
    "vec_static_blob_from_raw",
    "vec_sub",
    "vec_to_json",
    "vec_type",
    "vec_version",
 ]
 MODULES = [
@ -448,6 +448,20 @@ def test_vec_slice():
        vec_slice(b"\xab\xab\xab\xab", 0, 0)
 def test_vec_type():
    vec_type = lambda *args, a="?": db.execute(f"select vec_type({a})", args).fetchone()[0]
    assert vec_type('[1]') == "float32"
    assert vec_type(b"\xaa\xbb\xcc\xdd") == "float32"
    assert vec_type('[1]', a='vec_f32(?)') == "float32"
    assert vec_type('[1]', a='vec_int8(?)') == "int8"
    assert vec_type(b"\xaa", a='vec_bit(?)') == "bit"
    with _raises("invalid float32 vector"):
        vec_type(b"\xaa")
    with _raises("found NULL"):
        vec_type(None)
 def test_vec_add():
    vec_add = lambda *args, a="?", b="?": db.execute(
        f"select vec_add({a}, {b})", args
@ -517,11 +531,11 @@ def test_vec_to_json():
@pytest.mark.skip(reason="TODO")
-def test_vec_quantize_i8():
+def test_vec_quantize_int8():
-    vec_quantize_i8 = lambda *args: db.execute(
+    vec_quantize_int8 = lambda *args: db.execute(
-        "select vec_quantize_i8()", args
+        "select vec_quantize_int8()", args
    ).fetchone()[0]
-    assert vec_quantize_i8() == 111
+    assert vec_quantize_int8() == 111
 def test_vec_quantize_binary():
@ -1020,9 +1034,9 @@ def test_vec0_updates():
    db.execute(
        """
               INSERT INTO t3 VALUES
-                (1, :x, vec_quantize_i8(:x, 'unit') ,vec_quantize_binary(:x)),
+                (1, :x, vec_quantize_int8(:x, 'unit') ,vec_quantize_binary(:x)),
-                (2, :y, vec_quantize_i8(:y, 'unit') ,vec_quantize_binary(:y)),
+                (2, :y, vec_quantize_int8(:y, 'unit') ,vec_quantize_binary(:y)),
-                (3, :z, vec_quantize_i8(:z, 'unit') ,vec_quantize_binary(:z));
+                (3, :z, vec_quantize_int8(:z, 'unit') ,vec_quantize_binary(:z));
        """,
        {
            "x": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]",
@ -1795,7 +1809,7 @@ def test_vec0_knn():
    db.executemany(
        """
               INSERT INTO v VALUES
-                (:id, :vector, vec_quantize_i8(:vector, 'unit') ,vec_quantize_binary(:vector));
+                (:id, :vector, vec_quantize_int8(:vector, 'unit') ,vec_quantize_binary(:vector));
        """,
        [
            {