mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
vec_type(), API references
This commit is contained in:
parent
cfd8e9a46b
commit
ff6cf96e2a
6 changed files with 677 additions and 240 deletions
184
reference.yaml
184
reference.yaml
|
|
@ -1,33 +1,45 @@
|
||||||
sections:
|
sections:
|
||||||
meta:
|
|
||||||
title: Meta
|
|
||||||
desc: TODO
|
|
||||||
constructors:
|
constructors:
|
||||||
title: Constructors
|
title: Constructors
|
||||||
desc: TODO
|
desc: |
|
||||||
|
SQL functions that "construct" vectors with different element types.
|
||||||
|
|
||||||
|
Currently, only `float32`, `int8`, and `bit` vectors are supported.
|
||||||
|
|
||||||
op:
|
op:
|
||||||
title: Operations
|
title: Operations
|
||||||
desc: TODO
|
desc: |
|
||||||
|
Different operations and utilities for working with vectors.
|
||||||
distance:
|
distance:
|
||||||
title: Distance functions
|
title: Distance functions
|
||||||
desc: TODO
|
desc: Various algorithms to calculate distance between two vectors.
|
||||||
quantization:
|
quantization:
|
||||||
title: Quantization
|
title: Quantization
|
||||||
desc: TODO
|
desc: Various techniques to "compress" a vector by reducing precision and accuracy.
|
||||||
functions:
|
numpy:
|
||||||
|
title: "NumPy Utilities"
|
||||||
|
desc: Functions to read data from or work with [NumPy arrays](https://numpy.org/doc/stable/reference/generated/numpy.array.html).
|
||||||
|
meta:
|
||||||
|
title: Meta
|
||||||
|
desc: Helper functions to debug `sqlite-vec` installations.
|
||||||
|
entrypoints:
|
||||||
|
title: Entrypoints
|
||||||
|
desc: All the named entrypoints that load in different `sqlite-vec` functions and options.
|
||||||
|
# vec0:
|
||||||
|
# title: "vec0 Virtual Table"
|
||||||
|
# desc: TODO
|
||||||
|
meta:
|
||||||
vec_version:
|
vec_version:
|
||||||
params: []
|
params: []
|
||||||
section: meta
|
|
||||||
desc: Returns a version string of the current `sqlite-vec` installation.
|
desc: Returns a version string of the current `sqlite-vec` installation.
|
||||||
example: select vec_version();
|
example: select vec_version();
|
||||||
vec_debug:
|
vec_debug:
|
||||||
params: []
|
params: []
|
||||||
section: meta
|
|
||||||
desc: Returns debugging information of the current `sqlite-vec` installation.
|
desc: Returns debugging information of the current `sqlite-vec` installation.
|
||||||
example: select vec_debug();
|
example: select vec_debug();
|
||||||
|
constructors:
|
||||||
vec_f32:
|
vec_f32:
|
||||||
params: [vector]
|
params: [vector]
|
||||||
section: constructors
|
|
||||||
desc: |
|
desc: |
|
||||||
Creates a float vector from a BLOB or JSON text. If a BLOB is provided,
|
Creates a float vector from a BLOB or JSON text. If a BLOB is provided,
|
||||||
the length must be divisible by 4, as a float takes up 4 bytes of space each.
|
the length must be divisible by 4, as a float takes up 4 bytes of space each.
|
||||||
|
|
@ -42,7 +54,6 @@ functions:
|
||||||
- select vec_f32(X'AA');
|
- select vec_f32(X'AA');
|
||||||
vec_int8:
|
vec_int8:
|
||||||
params: [vector]
|
params: [vector]
|
||||||
section: constructors
|
|
||||||
desc: |
|
desc: |
|
||||||
Creates a 8-bit integer vector from a BLOB or JSON text. If a BLOB is provided,
|
Creates a 8-bit integer vector from a BLOB or JSON text. If a BLOB is provided,
|
||||||
the length must be divisible by 4, as a float takes up 4 bytes of space each.
|
the length must be divisible by 4, as a float takes up 4 bytes of space each.
|
||||||
|
|
@ -60,19 +71,18 @@ functions:
|
||||||
|
|
||||||
vec_bit:
|
vec_bit:
|
||||||
params: [vector]
|
params: [vector]
|
||||||
section: constructors
|
|
||||||
desc: |
|
desc: |
|
||||||
Creates a binary vector from a BLOB.
|
Creates a binary vector from a BLOB.
|
||||||
|
|
||||||
The returned value is a BLOB with 4 bytes per element, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
|
The returned value is a BLOB with 1 byte per 8 elements, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
|
||||||
of `224`.
|
of `224`.
|
||||||
example:
|
example:
|
||||||
- select vec_bit(X'F0');
|
- select vec_bit(X'F0');
|
||||||
- select subtype(vec_bit(X'F0'));
|
- select subtype(vec_bit(X'F0'));
|
||||||
- select vec_to_json(vec_bit(X'F0'));
|
- select vec_to_json(vec_bit(X'F0'));
|
||||||
|
op:
|
||||||
vec_length:
|
vec_length:
|
||||||
params: [vector]
|
params: [vector]
|
||||||
section: op
|
|
||||||
desc: |
|
desc: |
|
||||||
Returns the number of elements in the given vector.
|
Returns the number of elements in the given vector.
|
||||||
The vector can be `JSON`, `BLOB`, or the result of a [constructor function](#constructors).
|
The vector can be `JSON`, `BLOB`, or the result of a [constructor function](#constructors).
|
||||||
|
|
@ -84,9 +94,20 @@ functions:
|
||||||
- select vec_length(vec_int8(X'AABBCCDD'));
|
- select vec_length(vec_int8(X'AABBCCDD'));
|
||||||
- select vec_length(vec_bit(X'AABBCCDD'));
|
- select vec_length(vec_bit(X'AABBCCDD'));
|
||||||
- select vec_length(X'CCDD');
|
- select vec_length(X'CCDD');
|
||||||
|
vec_type:
|
||||||
|
params: [vector]
|
||||||
|
desc: |
|
||||||
|
Returns the name of the type of `vector` as text. One of `'float32'`, `'int8'`, or `'bit'`.
|
||||||
|
|
||||||
|
This function will return an error if `vector` is invalid.
|
||||||
|
example:
|
||||||
|
- select vec_type('[.1, .2]');
|
||||||
|
- select vec_type(X'AABBCCDD');
|
||||||
|
- select vec_type(vec_int8(X'AABBCCDD'));
|
||||||
|
- select vec_type(vec_bit(X'AABBCCDD'));
|
||||||
|
- select vec_type(X'CCDD');
|
||||||
vec_add:
|
vec_add:
|
||||||
params: [a, b]
|
params: [a, b]
|
||||||
section: op
|
|
||||||
desc: |
|
desc: |
|
||||||
Adds every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
|
Adds every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
|
||||||
must be of the same type and same length. Only `float32` and `int8` vectors are supported.
|
must be of the same type and same length. Only `float32` and `int8` vectors are supported.
|
||||||
|
|
@ -119,7 +140,6 @@ functions:
|
||||||
- select vec_add(vec_bit(X'AA'), vec_bit(X'BB'));
|
- select vec_add(vec_bit(X'AA'), vec_bit(X'BB'));
|
||||||
vec_sub:
|
vec_sub:
|
||||||
params: [a, b]
|
params: [a, b]
|
||||||
section: op
|
|
||||||
desc: |
|
desc: |
|
||||||
Subtracts every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
|
Subtracts every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
|
||||||
must be of the same type and same length. Only `float32` and `int8` vectors are supported.
|
must be of the same type and same length. Only `float32` and `int8` vectors are supported.
|
||||||
|
|
@ -152,7 +172,6 @@ functions:
|
||||||
- select vec_sub(vec_bit(X'AA'), vec_bit(X'BB'));
|
- select vec_sub(vec_bit(X'AA'), vec_bit(X'BB'));
|
||||||
vec_normalize:
|
vec_normalize:
|
||||||
params: [vector]
|
params: [vector]
|
||||||
section: op
|
|
||||||
desc: |
|
desc: |
|
||||||
Performs L2 normalization on the given vector. Only float32 vectors are currently supported.
|
Performs L2 normalization on the given vector. Only float32 vectors are currently supported.
|
||||||
|
|
||||||
|
|
@ -172,7 +191,6 @@ functions:
|
||||||
);
|
);
|
||||||
vec_slice:
|
vec_slice:
|
||||||
params: [vector, start, end]
|
params: [vector, start, end]
|
||||||
section: op
|
|
||||||
desc: |
|
desc: |
|
||||||
Extract a subset of `vector` from the `start` element (inclusive) to the `end` element (exclusive). TODO check
|
Extract a subset of `vector` from the `start` element (inclusive) to the `end` element (exclusive). TODO check
|
||||||
|
|
||||||
|
|
@ -208,7 +226,6 @@ functions:
|
||||||
);
|
);
|
||||||
vec_to_json:
|
vec_to_json:
|
||||||
params: [vector]
|
params: [vector]
|
||||||
section: op
|
|
||||||
desc: |
|
desc: |
|
||||||
Represents a vector as JSON text. The input vector can be a vector BLOB or JSON text.
|
Represents a vector as JSON text. The input vector can be a vector BLOB or JSON text.
|
||||||
|
|
||||||
|
|
@ -219,10 +236,45 @@ functions:
|
||||||
- select vec_to_json(vec_bit(X'AABBCCDD'));
|
- select vec_to_json(vec_bit(X'AABBCCDD'));
|
||||||
- select vec_to_json('[1,2,3,4]');
|
- select vec_to_json('[1,2,3,4]');
|
||||||
- select vec_to_json('invalid');
|
- select vec_to_json('invalid');
|
||||||
|
vec_each:
|
||||||
|
params: [vector]
|
||||||
|
desc: |
|
||||||
|
A table function to iterate through every element in a vector. One row id returned per element in a vector.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE vec_each(
|
||||||
|
rowid int, -- The
|
||||||
|
vector HIDDEN -- input parameter: A well-formed vector value
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns an error if `vector` is not a valid vector.
|
||||||
|
example:
|
||||||
|
- select rowid, value from vec_each('[1,2,3,4]');
|
||||||
|
- select rowid, value from vec_each(X'AABBCCDD00112233');
|
||||||
|
- select rowid, value from vec_each(vec_int8(X'AABBCCDD'));
|
||||||
|
- select rowid, value from vec_each(vec_bit(X'F0'));
|
||||||
|
|
||||||
|
distance:
|
||||||
|
vec_distance_L2:
|
||||||
|
params: [a, b]
|
||||||
|
desc: |
|
||||||
|
Calculates the L2 euclidian distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
|
||||||
|
|
||||||
|
Returns an error under the following conditions:
|
||||||
|
- `a` or `b` are invalid vectors
|
||||||
|
- `a` or `b` do not share the same vector element types (ex float32 or int8)
|
||||||
|
- `a` or `b` are bit vectors. Use [`vec_distance_hamming()`](#vec_distance_hamming) for distance calculations between two bitvectors.
|
||||||
|
- `a` or `b` do not have the same length.
|
||||||
|
example:
|
||||||
|
- select vec_distance_L2('[1, 1]', '[2, 2]');
|
||||||
|
- select vec_distance_L2('[1, 1]', '[-2, -2]');
|
||||||
|
- select vec_distance_L2('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
|
||||||
|
- select vec_distance_L2(X'AABBCCDD', X'00112233');
|
||||||
|
- select vec_distance_L2('[1, 1]', vec_int8('[2, 2]'));
|
||||||
|
- select vec_distance_L2(vec_bit(X'AA'), vec_bit(X'BB'));
|
||||||
vec_distance_cosine:
|
vec_distance_cosine:
|
||||||
params: [a, b]
|
params: [a, b]
|
||||||
section: distance
|
|
||||||
desc: |
|
desc: |
|
||||||
Calculates the cosine distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
|
Calculates the cosine distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
|
||||||
|
|
||||||
|
|
@ -236,9 +288,10 @@ functions:
|
||||||
- select vec_distance_cosine('[1, 1]', '[-2, -2]');
|
- select vec_distance_cosine('[1, 1]', '[-2, -2]');
|
||||||
- select vec_distance_cosine('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
|
- select vec_distance_cosine('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
|
||||||
- select vec_distance_cosine(X'AABBCCDD', X'00112233');
|
- select vec_distance_cosine(X'AABBCCDD', X'00112233');
|
||||||
|
- select vec_distance_cosine('[1, 1]', vec_int8('[2, 2]'));
|
||||||
|
- select vec_distance_cosine(vec_bit(X'AA'), vec_bit(X'BB'));
|
||||||
vec_distance_hamming:
|
vec_distance_hamming:
|
||||||
params: [a, b]
|
params: [a, b]
|
||||||
section: distance
|
|
||||||
desc: |
|
desc: |
|
||||||
Calculates the hamming distance between two bitvectors `a` and `b`. Only valid for bitvectors.
|
Calculates the hamming distance between two bitvectors `a` and `b`. Only valid for bitvectors.
|
||||||
|
|
||||||
|
|
@ -250,34 +303,85 @@ functions:
|
||||||
- select vec_distance_hamming(vec_bit(X'00'), vec_bit(X'FF'));
|
- select vec_distance_hamming(vec_bit(X'00'), vec_bit(X'FF'));
|
||||||
- select vec_distance_hamming(vec_bit(X'FF'), vec_bit(X'FF'));
|
- select vec_distance_hamming(vec_bit(X'FF'), vec_bit(X'FF'));
|
||||||
- select vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'44'));
|
- select vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'44'));
|
||||||
- select vec_distance_hamming(X'F0', X'00');
|
- select vec_distance_hamming('[1, 1]', '[0, 0]');
|
||||||
vec_distance_l2:
|
|
||||||
params: [a, b]
|
quantization:
|
||||||
section: distance
|
vec_quantize_binary:
|
||||||
|
params: [vector]
|
||||||
|
desc: |
|
||||||
|
Quantize a float32 or int8 vector into a bitvector.
|
||||||
|
For every element in the vector, a `1` is assigned to positive numbers and a `0` is assigned to negative numbers.
|
||||||
|
These values are then packed into a bit vector.
|
||||||
|
|
||||||
|
Returns an error if `vector` is invalid, or if `vector` is not a float32 or int8 vector.
|
||||||
|
example:
|
||||||
|
- select vec_quantize_binary('[1, 2, 3, 4, 5, 6, 7, 8]');
|
||||||
|
- select vec_quantize_binary('[1, 2, 3, 4, -5, -6, -7, -8]');
|
||||||
|
- select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
|
||||||
|
- select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
|
||||||
|
- select vec_quantize_binary(vec_int8(X'11223344'));
|
||||||
|
- select vec_quantize_binary(vec_bit(X'FF'));
|
||||||
|
vec_quantize_i8:
|
||||||
|
params: [vector, "[start]", "[end]"]
|
||||||
desc: x
|
desc: x
|
||||||
example: select 'todo';
|
example: select 'todo';
|
||||||
|
|
||||||
vec_quantize_binary:
|
numpy:
|
||||||
|
vec_npy_each:
|
||||||
params: [vector]
|
params: [vector]
|
||||||
section: quantization
|
desc: |
|
||||||
desc: x
|
xxx
|
||||||
example: select 'todo';
|
example:
|
||||||
vec_quantize_i8:
|
- |
|
||||||
params: [vector, "[start]", "[end]"]
|
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
|
||||||
section: quantization
|
select
|
||||||
desc: x
|
rowid,
|
||||||
example: select 'todo';
|
vector,
|
||||||
|
vec_type(vector),
|
||||||
|
vec_to_json(vector)
|
||||||
|
from vec_npy_each(
|
||||||
|
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
|
||||||
|
)
|
||||||
|
- |
|
||||||
|
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
|
||||||
|
select
|
||||||
|
rowid,
|
||||||
|
vector,
|
||||||
|
vec_type(vector),
|
||||||
|
vec_to_json(vector)
|
||||||
|
from vec_npy_each(
|
||||||
|
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
|
||||||
|
)
|
||||||
|
|
||||||
|
vec0:
|
||||||
|
vec0:
|
||||||
|
params: []
|
||||||
|
desc: TODO
|
||||||
|
example:
|
||||||
|
- |
|
||||||
|
create virtual table vec_items using vec0(
|
||||||
|
contents_embedding float[4]
|
||||||
|
);
|
||||||
|
- |
|
||||||
|
insert into vec_items(rowid, contents_embedding)
|
||||||
|
values (1, '[1, 1, 1, 1]'),
|
||||||
|
(2, '[2, 2, 2, 2]'),
|
||||||
|
(3, '[3, 3, 3, 3]');
|
||||||
|
|
||||||
|
entrypoints:
|
||||||
|
{}
|
||||||
|
#sqlite3_vec_init:
|
||||||
|
# desc: |
|
||||||
|
# asdf
|
||||||
|
#sqlite3_vec_fs_read_init:
|
||||||
|
# desc: |
|
||||||
|
# asdf
|
||||||
#table_functions:
|
#table_functions:
|
||||||
# vec_each:
|
# vec_each:
|
||||||
# columns: [rowid, value]
|
# columns: [rowid, value]
|
||||||
# inputs: ["vector"]
|
# inputs: ["vector"]
|
||||||
# desc:
|
# desc:
|
||||||
# example:
|
# example:
|
||||||
# vec_npy_each:
|
|
||||||
# columns: [rowid, vector]
|
|
||||||
# inputs: ["input"]
|
|
||||||
# desc:
|
|
||||||
# example:
|
|
||||||
#virtual_tables:
|
#virtual_tables:
|
||||||
# vec0:
|
# vec0:
|
||||||
# desc:
|
# desc:
|
||||||
|
|
|
||||||
|
|
@ -1,45 +1,23 @@
|
||||||
|
---
|
||||||
|
outline: 2
|
||||||
|
---
|
||||||
|
|
||||||
# API Reference
|
# API Reference
|
||||||
|
|
||||||
|
A complete reference to all the SQL scalar functions, table functions, and virtual tables inside `sqlite-vec`.
|
||||||
|
|
||||||
::: warning
|
::: warning
|
||||||
sqlite-vec is pre-v1, so expect breaking changes.
|
sqlite-vec is pre-v1, so expect breaking changes.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
[[toc]]
|
[[toc]]
|
||||||
|
|
||||||
## Meta {#meta}
|
|
||||||
|
|
||||||
TODO
|
|
||||||
|
|
||||||
### `vec_version()` {#vec_version}
|
|
||||||
|
|
||||||
Returns a version string of the current `sqlite-vec` installation.
|
|
||||||
|
|
||||||
```sql
|
|
||||||
select vec_version();
|
|
||||||
-- 'v0.0.1-alpha.36'
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
### `vec_debug()` {#vec_debug}
|
|
||||||
|
|
||||||
Returns debugging information of the current `sqlite-vec` installation.
|
|
||||||
|
|
||||||
```sql
|
|
||||||
select vec_debug();
|
|
||||||
/*
|
|
||||||
'Version: v0.0.1-alpha.36
|
|
||||||
Date: 2024-07-16T23:06:41Z-0700
|
|
||||||
Commit: e507bc0230de6dc44c7ff3b4895785edd734f31d
|
|
||||||
Build flags: avx '
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
## Constructors {#constructors}
|
## Constructors {#constructors}
|
||||||
|
|
||||||
TODO
|
SQL functions that "construct" vectors with different element types.
|
||||||
|
|
||||||
|
Currently, only `float32`, `int8`, and `bit` vectors are supported.
|
||||||
|
|
||||||
|
|
||||||
### `vec_f32(vector)` {#vec_f32}
|
### `vec_f32(vector)` {#vec_f32}
|
||||||
|
|
||||||
|
|
@ -52,7 +30,7 @@ of `223`.
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
select vec_f32('[.1, .2, .3, 4]');
|
select vec_f32('[.1, .2, .3, 4]');
|
||||||
-- X'CDCCCC3DCDCC4C3E9A99993E008040'
|
-- X'CDCCCC3DCDCC4C3E9A99993E00008040'
|
||||||
|
|
||||||
select subtype(vec_f32('[.1, .2, .3, 4]'));
|
select subtype(vec_f32('[.1, .2, .3, 4]'));
|
||||||
-- 223
|
-- 223
|
||||||
|
|
@ -81,7 +59,7 @@ of `225`.
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
select vec_int8('[1, 2, 3, 4]');
|
select vec_int8('[1, 2, 3, 4]');
|
||||||
-- X'1234'
|
-- X'01020304'
|
||||||
|
|
||||||
select subtype(vec_int8('[1, 2, 3, 4]'));
|
select subtype(vec_int8('[1, 2, 3, 4]'));
|
||||||
-- 225
|
-- 225
|
||||||
|
|
@ -102,7 +80,7 @@ select vec_int8('[999]');
|
||||||
|
|
||||||
Creates a binary vector from a BLOB.
|
Creates a binary vector from a BLOB.
|
||||||
|
|
||||||
The returned value is a BLOB with 4 bytes per element, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
|
The returned value is a BLOB with 1 byte per 8 elements, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
|
||||||
of `224`.
|
of `224`.
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -121,7 +99,8 @@ select vec_to_json(vec_bit(X'F0'));
|
||||||
|
|
||||||
## Operations {#op}
|
## Operations {#op}
|
||||||
|
|
||||||
TODO
|
Different operations and utilities for working with vectors.
|
||||||
|
|
||||||
|
|
||||||
### `vec_length(vector)` {#vec_length}
|
### `vec_length(vector)` {#vec_length}
|
||||||
|
|
||||||
|
|
@ -148,6 +127,32 @@ select vec_length(X'CCDD');
|
||||||
-- ❌ invalid float32 vector BLOB length. Must be divisible by 4, found 2
|
-- ❌ invalid float32 vector BLOB length. Must be divisible by 4, found 2
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### `vec_type(vector)` {#vec_type}
|
||||||
|
|
||||||
|
Returns the name of the type of `vector` as text. One of `'float32'`, `'int8'`, or `'bit'`.
|
||||||
|
|
||||||
|
This function will return an error if `vector` is invalid.
|
||||||
|
|
||||||
|
|
||||||
|
```sql
|
||||||
|
select vec_type('[.1, .2]');
|
||||||
|
-- 'float32'
|
||||||
|
|
||||||
|
select vec_type(X'AABBCCDD');
|
||||||
|
-- 'float32'
|
||||||
|
|
||||||
|
select vec_type(vec_int8(X'AABBCCDD'));
|
||||||
|
-- 'int8'
|
||||||
|
|
||||||
|
select vec_type(vec_bit(X'AABBCCDD'));
|
||||||
|
-- 'bit'
|
||||||
|
|
||||||
|
select vec_type(X'CCDD');
|
||||||
|
-- ❌ invalid float32 vector BLOB length. Must be divisible by 4, found 2
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### `vec_add(a, b)` {#vec_add}
|
### `vec_add(a, b)` {#vec_add}
|
||||||
|
|
@ -165,7 +170,7 @@ select vec_add(
|
||||||
'[.1, .2, .3]',
|
'[.1, .2, .3]',
|
||||||
'[.4, .5, .6]'
|
'[.4, .5, .6]'
|
||||||
);
|
);
|
||||||
-- X'0003F3333333F6766663F'
|
-- X'0000003F3333333F6766663F'
|
||||||
|
|
||||||
select vec_to_json(
|
select vec_to_json(
|
||||||
vec_add(
|
vec_add(
|
||||||
|
|
@ -243,7 +248,7 @@ Returns an error if the input is an invalid vector or not a float32 vector.
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
select vec_normalize('[2, 3, 1, -4]');
|
select vec_normalize('[2, 3, 1, -4]');
|
||||||
-- X'BAF4BA3E8B37C3FBAF43A3EBAF43ABF'
|
-- X'BAF4BA3E8B370C3FBAF43A3EBAF43ABF'
|
||||||
|
|
||||||
select vec_to_json(
|
select vec_to_json(
|
||||||
vec_normalize('[2, 3, 1, -4]')
|
vec_normalize('[2, 3, 1, -4]')
|
||||||
|
|
@ -277,7 +282,7 @@ Returns an error in the following conditions:
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
select vec_slice('[1, 2,3, 4]', 0, 2);
|
select vec_slice('[1, 2,3, 4]', 0, 2);
|
||||||
-- X'00803F00040'
|
-- X'0000803F00000040'
|
||||||
|
|
||||||
select vec_to_json(
|
select vec_to_json(
|
||||||
vec_slice('[1, 2,3, 4]', 0, 2)
|
vec_slice('[1, 2,3, 4]', 0, 2)
|
||||||
|
|
@ -331,11 +336,134 @@ select vec_to_json('invalid');
|
||||||
-- ❌ JSON array parsing error: Input does not start with '['
|
-- ❌ JSON array parsing error: Input does not start with '['
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### `vec_each(vector)` {#vec_each}
|
||||||
|
|
||||||
|
A table function to iterate through every element in a vector. One row id returned per element in a vector.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE vec_each(
|
||||||
|
rowid int, -- The
|
||||||
|
vector HIDDEN -- input parameter: A well-formed vector value
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns an error if `vector` is not a valid vector.
|
||||||
|
|
||||||
|
|
||||||
|
```sql
|
||||||
|
select rowid, value from vec_each('[1,2,3,4]');
|
||||||
|
/*
|
||||||
|
┌───────┬───────┐
|
||||||
|
│ rowid │ value │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 0 │ 1 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 1 │ 2 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 2 │ 3 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 3 │ 4 │
|
||||||
|
└───────┴───────┘
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
select rowid, value from vec_each(X'AABBCCDD00112233');
|
||||||
|
/*
|
||||||
|
┌───────┬──────────────────────┐
|
||||||
|
│ rowid │ value │
|
||||||
|
├───────┼──────────────────────┤
|
||||||
|
│ 0 │ -1844071490169864200 │
|
||||||
|
├───────┼──────────────────────┤
|
||||||
|
│ 1 │ 3.773402568185702e-8 │
|
||||||
|
└───────┴──────────────────────┘
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
select rowid, value from vec_each(vec_int8(X'AABBCCDD'));
|
||||||
|
/*
|
||||||
|
┌───────┬───────┐
|
||||||
|
│ rowid │ value │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 0 │ -86 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 1 │ -69 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 2 │ -52 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 3 │ -35 │
|
||||||
|
└───────┴───────┘
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
select rowid, value from vec_each(vec_bit(X'F0'));
|
||||||
|
/*
|
||||||
|
┌───────┬───────┐
|
||||||
|
│ rowid │ value │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 0 │ 1 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 1 │ 1 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 2 │ 1 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 3 │ 1 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 4 │ 0 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 5 │ 0 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 6 │ 0 │
|
||||||
|
├───────┼───────┤
|
||||||
|
│ 7 │ 0 │
|
||||||
|
└───────┴───────┘
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Distance functions {#distance}
|
## Distance functions {#distance}
|
||||||
|
|
||||||
TODO
|
Various algorithms to calculate distance between two vectors.
|
||||||
|
|
||||||
|
### `vec_distance_L2(a, b)` {#vec_distance_L2}
|
||||||
|
|
||||||
|
Calculates the L2 euclidian distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.
|
||||||
|
|
||||||
|
Returns an error under the following conditions:
|
||||||
|
- `a` or `b` are invalid vectors
|
||||||
|
- `a` or `b` do not share the same vector element types (ex float32 or int8)
|
||||||
|
- `a` or `b` are bit vectors. Use [`vec_distance_hamming()`](#vec_distance_hamming) for distance calculations between two bitvectors.
|
||||||
|
- `a` or `b` do not have the same length.
|
||||||
|
|
||||||
|
|
||||||
|
```sql
|
||||||
|
select vec_distance_L2('[1, 1]', '[2, 2]');
|
||||||
|
-- 1.4142135381698608
|
||||||
|
|
||||||
|
select vec_distance_L2('[1, 1]', '[-2, -2]');
|
||||||
|
-- 4.242640495300293
|
||||||
|
|
||||||
|
select vec_distance_L2('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
|
||||||
|
-- 5.7157673835754395
|
||||||
|
|
||||||
|
select vec_distance_L2(X'AABBCCDD', X'00112233');
|
||||||
|
-- 1844071490169864200
|
||||||
|
|
||||||
|
select vec_distance_L2('[1, 1]', vec_int8('[2, 2]'));
|
||||||
|
-- ❌ Vector type mistmatch. First vector has type float32, while the second has type int8.
|
||||||
|
|
||||||
|
select vec_distance_L2(vec_bit(X'AA'), vec_bit(X'BB'));
|
||||||
|
-- ❌ Cannot calculate L2 distance between two bitvectors.
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
### `vec_distance_cosine(a, b)` {#vec_distance_cosine}
|
### `vec_distance_cosine(a, b)` {#vec_distance_cosine}
|
||||||
|
|
||||||
|
|
@ -361,6 +489,12 @@ select vec_distance_cosine('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
|
||||||
select vec_distance_cosine(X'AABBCCDD', X'00112233');
|
select vec_distance_cosine(X'AABBCCDD', X'00112233');
|
||||||
-- 2
|
-- 2
|
||||||
|
|
||||||
|
select vec_distance_cosine('[1, 1]', vec_int8('[2, 2]'));
|
||||||
|
-- ❌ Vector type mistmatch. First vector has type float32, while the second has type int8.
|
||||||
|
|
||||||
|
select vec_distance_cosine(vec_bit(X'AA'), vec_bit(X'BB'));
|
||||||
|
-- ❌ Cannot calculate cosine distance between two bitvectors.
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -384,34 +518,43 @@ select vec_distance_hamming(vec_bit(X'FF'), vec_bit(X'FF'));
|
||||||
select vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'44'));
|
select vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'44'));
|
||||||
-- 4
|
-- 4
|
||||||
|
|
||||||
select vec_distance_hamming(X'F0', X'00');
|
select vec_distance_hamming('[1, 1]', '[0, 0]');
|
||||||
-- ❌ Error reading 1st vector: invalid float32 vector BLOB length. Must be divisible by 4, found 1
|
-- ❌ Cannot calculate hamming distance between two float32 vectors.
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
### `vec_distance_l2(a, b)` {#vec_distance_l2}
|
|
||||||
|
|
||||||
x
|
|
||||||
|
|
||||||
```sql
|
|
||||||
select 'todo';
|
|
||||||
-- 'todo'
|
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Quantization {#quantization}
|
## Quantization {#quantization}
|
||||||
|
|
||||||
TODO
|
Various techniques to "compress" a vector by reducing precision and accuracy.
|
||||||
|
|
||||||
### `vec_quantize_binary(vector)` {#vec_quantize_binary}
|
### `vec_quantize_binary(vector)` {#vec_quantize_binary}
|
||||||
|
|
||||||
x
|
Quantize a float32 or int8 vector into a bitvector.
|
||||||
|
For every element in the vector, a `1` is assigned to positive numbers and a `0` is assigned to negative numbers.
|
||||||
|
These values are then packed into a bit vector.
|
||||||
|
|
||||||
|
Returns an error if `vector` is invalid, or if `vector` is not a float32 or int8 vector.
|
||||||
|
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
select 'todo';
|
select vec_quantize_binary('[1, 2, 3, 4, 5, 6, 7, 8]');
|
||||||
-- 'todo'
|
-- X'FF'
|
||||||
|
|
||||||
|
select vec_quantize_binary('[1, 2, 3, 4, -5, -6, -7, -8]');
|
||||||
|
-- X'0F'
|
||||||
|
|
||||||
|
select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
|
||||||
|
-- X'00'
|
||||||
|
|
||||||
|
select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
|
||||||
|
-- X'00'
|
||||||
|
|
||||||
|
select vec_quantize_binary(vec_int8(X'11223344'));
|
||||||
|
-- ❌ Binary quantization requires vectors with a length divisible by 8
|
||||||
|
|
||||||
|
select vec_quantize_binary(vec_bit(X'FF'));
|
||||||
|
-- ❌ Can only binary quantize float or int8 vectors
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
@ -427,3 +570,97 @@ select 'todo';
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## NumPy Utilities {#numpy}
|
||||||
|
|
||||||
|
Functions to read data from or work with [NumPy arrays](https://numpy.org/doc/stable/reference/generated/numpy.array.html).
|
||||||
|
|
||||||
|
### `vec_npy_each(vector)` {#vec_npy_each}
|
||||||
|
|
||||||
|
xxx
|
||||||
|
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
|
||||||
|
select
|
||||||
|
rowid,
|
||||||
|
vector,
|
||||||
|
vec_type(vector),
|
||||||
|
vec_to_json(vector)
|
||||||
|
from vec_npy_each(
|
||||||
|
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
|
||||||
|
)
|
||||||
|
/*
|
||||||
|
┌───────┬─────────────┬──────────────────┬─────────────────────┐
|
||||||
|
│ rowid │ vector │ vec_type(vector) │ vec_to_json(vector) │
|
||||||
|
├───────┼─────────────┼──────────────────┼─────────────────────┤
|
||||||
|
│ 0 │ X'0000803F' │ 'float32' │ '[1.000000]' │
|
||||||
|
├───────┼─────────────┼──────────────────┼─────────────────────┤
|
||||||
|
│ 1 │ X'00000040' │ 'float32' │ '[2.000000]' │
|
||||||
|
├───────┼─────────────┼──────────────────┼─────────────────────┤
|
||||||
|
│ 2 │ X'00004040' │ 'float32' │ '[3.000000]' │
|
||||||
|
└───────┴─────────────┴──────────────────┴─────────────────────┘
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
-- db.execute('select quote(?)', [to_npy(np.array([[1.0], [2.0], [3.0]], dtype=np.float32))]).fetchone()
|
||||||
|
select
|
||||||
|
rowid,
|
||||||
|
vector,
|
||||||
|
vec_type(vector),
|
||||||
|
vec_to_json(vector)
|
||||||
|
from vec_npy_each(
|
||||||
|
X'934E554D5059010076007B276465736372273A20273C6634272C2027666F727472616E5F6F72646572273A2046616C73652C20277368617065273A2028332C2031292C207D202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020200A0000803F0000004000004040'
|
||||||
|
)
|
||||||
|
/*
|
||||||
|
┌───────┬─────────────┬──────────────────┬─────────────────────┐
|
||||||
|
│ rowid │ vector │ vec_type(vector) │ vec_to_json(vector) │
|
||||||
|
├───────┼─────────────┼──────────────────┼─────────────────────┤
|
||||||
|
│ 0 │ X'0000803F' │ 'float32' │ '[1.000000]' │
|
||||||
|
├───────┼─────────────┼──────────────────┼─────────────────────┤
|
||||||
|
│ 1 │ X'00000040' │ 'float32' │ '[2.000000]' │
|
||||||
|
├───────┼─────────────┼──────────────────┼─────────────────────┤
|
||||||
|
│ 2 │ X'00004040' │ 'float32' │ '[3.000000]' │
|
||||||
|
└───────┴─────────────┴──────────────────┴─────────────────────┘
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Meta {#meta}
|
||||||
|
|
||||||
|
Helper functions to debug `sqlite-vec` installations.
|
||||||
|
|
||||||
|
### `vec_version()` {#vec_version}
|
||||||
|
|
||||||
|
Returns a version string of the current `sqlite-vec` installation.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
select vec_version();
|
||||||
|
-- 'v0.0.1-alpha.36'
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### `vec_debug()` {#vec_debug}
|
||||||
|
|
||||||
|
Returns debugging information of the current `sqlite-vec` installation.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
select vec_debug();
|
||||||
|
/*
|
||||||
|
'Version: v0.0.1-alpha.36
|
||||||
|
Date: 2024-07-16T23:06:41Z-0700
|
||||||
|
Commit: e507bc0230de6dc44c7ff3b4895785edd734f31d
|
||||||
|
Build flags: avx '
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Entrypoints {#entrypoints}
|
||||||
|
|
||||||
|
All the named entrypoints that load in different `sqlite-vec` functions and options.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,22 @@ import { readFileSync, writeFileSync } from "node:fs";
|
||||||
import * as v from "valibot";
|
import * as v from "valibot";
|
||||||
import { table } from "table";
|
import { table } from "table";
|
||||||
|
|
||||||
|
const HEADER = `---
|
||||||
|
outline: 2
|
||||||
|
---
|
||||||
|
|
||||||
|
# API Reference
|
||||||
|
|
||||||
|
A complete reference to all the SQL scalar functions, table functions, and virtual tables inside \`sqlite-vec\`.
|
||||||
|
|
||||||
|
::: warning
|
||||||
|
sqlite-vec is pre-v1, so expect breaking changes.
|
||||||
|
:::
|
||||||
|
|
||||||
|
[[toc]]
|
||||||
|
|
||||||
|
`;
|
||||||
|
|
||||||
const REF_PATH = resolve(
|
const REF_PATH = resolve(
|
||||||
dirname(fileURLToPath(import.meta.url)),
|
dirname(fileURLToPath(import.meta.url)),
|
||||||
"../reference.yaml"
|
"../reference.yaml"
|
||||||
|
|
@ -15,32 +31,25 @@ const EXT_PATH = resolve(
|
||||||
"../dist/vec0"
|
"../dist/vec0"
|
||||||
);
|
);
|
||||||
|
|
||||||
const DocSchema = v.object({
|
const DocSchema = v.objectWithRest(
|
||||||
sections: v.record(
|
{
|
||||||
v.string(),
|
sections: v.record(
|
||||||
v.object({
|
v.string(),
|
||||||
title: v.string(),
|
v.object({
|
||||||
desc: v.string(),
|
title: v.string(),
|
||||||
})
|
desc: v.string(),
|
||||||
),
|
})
|
||||||
functions: v.record(
|
),
|
||||||
v.string(),
|
},
|
||||||
v.object({
|
v.record(
|
||||||
params: v.array(v.string()),
|
|
||||||
desc: v.string(),
|
|
||||||
section: v.string(),
|
|
||||||
example: v.union([v.string(), v.array(v.string())]),
|
|
||||||
})
|
|
||||||
),
|
|
||||||
/*table_functions: v.record(
|
|
||||||
v.string(),
|
v.string(),
|
||||||
v.object({
|
v.object({
|
||||||
params: v.array(v.string()),
|
params: v.array(v.string()),
|
||||||
desc: v.string(),
|
desc: v.string(),
|
||||||
example: v.union([v.string(), v.array(v.string())]),
|
example: v.union([v.string(), v.array(v.string())]),
|
||||||
})
|
})
|
||||||
),*/
|
)
|
||||||
});
|
);
|
||||||
|
|
||||||
const tableConfig = {
|
const tableConfig = {
|
||||||
border: {
|
border: {
|
||||||
|
|
@ -78,7 +87,7 @@ function formatSingleValue(value) {
|
||||||
if (value instanceof Uint8Array) {
|
if (value instanceof Uint8Array) {
|
||||||
let s = "X'";
|
let s = "X'";
|
||||||
for (const v of value) {
|
for (const v of value) {
|
||||||
s += v.toString(16).toUpperCase();
|
s += v.toString(16).toUpperCase().padStart(2, "0");
|
||||||
}
|
}
|
||||||
s += "'";
|
s += "'";
|
||||||
return `-- ${s}`;
|
return `-- ${s}`;
|
||||||
|
|
@ -87,12 +96,13 @@ function formatSingleValue(value) {
|
||||||
return "-- " + JSON.stringify(value, null, 2);
|
return "-- " + JSON.stringify(value, null, 2);
|
||||||
}
|
}
|
||||||
function formatValue(value) {
|
function formatValue(value) {
|
||||||
if (typeof value === "string" || typeof value === "number") return value;
|
if (typeof value === "string") return `'${value}'`;
|
||||||
|
if (typeof value === "number") return value;
|
||||||
if (value === null) return "NULL";
|
if (value === null) return "NULL";
|
||||||
if (value instanceof Uint8Array) {
|
if (value instanceof Uint8Array) {
|
||||||
let s = "X'";
|
let s = "X'";
|
||||||
for (const v of value) {
|
for (const v of value) {
|
||||||
s += v.toString(16);
|
s += v.toString(16).toUpperCase().padStart(2, "0");
|
||||||
}
|
}
|
||||||
s += "'";
|
s += "'";
|
||||||
return s;
|
return s;
|
||||||
|
|
@ -125,7 +135,11 @@ function renderExamples(db, name, example) {
|
||||||
results = null;
|
results = null;
|
||||||
try {
|
try {
|
||||||
stmt = db.prepare(sql);
|
stmt = db.prepare(sql);
|
||||||
stmt.raw(true);
|
try {
|
||||||
|
stmt.raw(true);
|
||||||
|
} catch (err) {
|
||||||
|
1;
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error preparing statement for ${name}:`);
|
console.error(`Error preparing statement for ${name}:`);
|
||||||
console.error(error);
|
console.error(error);
|
||||||
|
|
@ -157,37 +171,27 @@ function renderExamples(db, name, example) {
|
||||||
return md;
|
return md;
|
||||||
}
|
}
|
||||||
|
|
||||||
let md = `# API Reference
|
let md = HEADER;
|
||||||
|
|
||||||
::: warning
|
|
||||||
sqlite-vec is pre-v1, so expect breaking changes.
|
|
||||||
:::
|
|
||||||
|
|
||||||
[[toc]]
|
|
||||||
|
|
||||||
`;
|
|
||||||
const doc = v.parse(DocSchema, load(readFileSync(REF_PATH, "utf8")));
|
const doc = v.parse(DocSchema, load(readFileSync(REF_PATH, "utf8")));
|
||||||
|
|
||||||
const db = new Database();
|
const db = new Database();
|
||||||
db.loadExtension(EXT_PATH);
|
db.loadExtension(EXT_PATH);
|
||||||
|
|
||||||
let lastSection = null;
|
for (const section in doc.sections) {
|
||||||
for (const [name, { params, desc, example, section }] of Object.entries(
|
md += `## ${doc.sections[section].title} {#${section}} \n\n`;
|
||||||
doc.functions
|
md += doc.sections[section].desc;
|
||||||
)) {
|
md += "\n\n";
|
||||||
const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`;
|
|
||||||
|
|
||||||
if (lastSection != section) {
|
for (const [name, { params, desc, example }] of Object.entries(
|
||||||
md += `## ${doc.sections[section].title} {#${section}} \n\n`;
|
doc[section]
|
||||||
md += doc.sections[section].desc;
|
)) {
|
||||||
md += "\n\n";
|
const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`;
|
||||||
lastSection = section;
|
|
||||||
|
md += "### " + headerText + "\n\n";
|
||||||
|
|
||||||
|
md += desc + "\n\n";
|
||||||
|
md += renderExamples(db, name, example);
|
||||||
}
|
}
|
||||||
|
|
||||||
md += "### " + headerText + "\n\n";
|
|
||||||
|
|
||||||
md += desc + "\n\n";
|
|
||||||
md += renderExamples(db, name, example);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
writeFileSync("api-reference.md", md, "utf8");
|
writeFileSync("api-reference.md", md, "utf8");
|
||||||
|
|
|
||||||
49
site/versioning.md
Normal file
49
site/versioning.md
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Semantic Versioning for `sqlite-vec`
|
||||||
|
|
||||||
|
`sqlite-vec` is pre-v1, so according to the rules of [Semantic Versioning](https://semver.org/),
|
||||||
|
so "minor" release like "0.2.0" or "0.3.0" may contain breaking changes.
|
||||||
|
|
||||||
|
But what exactly counts as a "breaking change" in a SQLite extension? The line isn't so clear, unforetunately.
|
||||||
|
Here are a all the surfaces that COULD count as a "breaking change":
|
||||||
|
|
||||||
|
- SQL functions and columns on virtual tables
|
||||||
|
- The C API (extension entrypoints)
|
||||||
|
- "Bindings" like the official `pip` and `npm` packages
|
||||||
|
- Release assets like the pre-compile extensions
|
||||||
|
|
||||||
|
## What counts as a "breaking change"?
|
||||||
|
|
||||||
|
|
||||||
|
### Changes to SQL functions
|
||||||
|
|
||||||
|
- Re-naming or removing an SQL function
|
||||||
|
- Changing the number of required SQL parameters
|
||||||
|
|
||||||
|
### Changes to SQL virtual tables
|
||||||
|
|
||||||
|
- The number of
|
||||||
|
|
||||||
|
### Changes to the C API
|
||||||
|
|
||||||
|
Currently there is no "official" C API for `sqlite-vec`. However, there are entrypoints defined in C that C developers or developers using FFI can call. Any
|
||||||
|
|
||||||
|
|
||||||
|
### Compile-time options
|
||||||
|
|
||||||
|
The removal of any compile time options
|
||||||
|
|
||||||
|
|
||||||
|
## When is `v1.0` coming?
|
||||||
|
|
||||||
|
In a few months! The main problems I want to solve before `v1.0` include:
|
||||||
|
|
||||||
|
- Metadata columns
|
||||||
|
- Metadata filtering
|
||||||
|
- ANN indexing
|
||||||
|
- Quantization + pre-transformations
|
||||||
|
|
||||||
|
Once those items are complete, I will likely create a `v1.0` release, along with renaming the `vec0` virtual table modile to `vec1`. And if future major releases are required, a `v2.0` major releases will be made with new `vec2` virtual tables and so on.
|
||||||
|
|
||||||
|
Ideally, only a `v1` major release would be required. But who knows what the future has in store with vector search!
|
||||||
|
|
||||||
|
In general, I will try my best to maximize stability and limit the number of breaking changes for future `sqlite-vec` versions.
|
||||||
199
sqlite-vec.c
199
sqlite-vec.c
|
|
@ -1082,8 +1082,105 @@ finish:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vec_quantize_i8(sqlite3_context *context, int argc,
|
char * vec_type_name(enum VectorElementType elementType) {
|
||||||
|
switch(elementType) {
|
||||||
|
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32:
|
||||||
|
return "float32";
|
||||||
|
case SQLITE_VEC_ELEMENT_TYPE_INT8:
|
||||||
|
return "int8";
|
||||||
|
case SQLITE_VEC_ELEMENT_TYPE_BIT:
|
||||||
|
return "bit";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vec_type(sqlite3_context *context, int argc,
|
||||||
|
sqlite3_value **argv) {
|
||||||
|
assert(argc == 1);
|
||||||
|
void *vector;
|
||||||
|
size_t dimensions;
|
||||||
|
vector_cleanup cleanup;
|
||||||
|
char *pzError;
|
||||||
|
enum VectorElementType elementType;
|
||||||
|
int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
|
||||||
|
&cleanup, &pzError);
|
||||||
|
if (rc != SQLITE_OK) {
|
||||||
|
sqlite3_result_error(context, pzError, -1);
|
||||||
|
sqlite3_free(pzError);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sqlite3_result_text(context, vec_type_name(elementType), -1, SQLITE_STATIC);
|
||||||
|
cleanup(vector);
|
||||||
|
|
||||||
|
}
|
||||||
|
static void vec_quantize_binary(sqlite3_context *context, int argc,
|
||||||
|
sqlite3_value **argv) {
|
||||||
|
assert(argc == 1);
|
||||||
|
void *vector;
|
||||||
|
size_t dimensions;
|
||||||
|
vector_cleanup vectorCleanup;
|
||||||
|
char *pzError;
|
||||||
|
enum VectorElementType elementType;
|
||||||
|
int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
|
||||||
|
&vectorCleanup, &pzError);
|
||||||
|
if (rc != SQLITE_OK) {
|
||||||
|
sqlite3_result_error(context, pzError, -1);
|
||||||
|
sqlite3_free(pzError);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(dimensions <= 0) {
|
||||||
|
sqlite3_result_error(context, "Zero length vectors are not supported.", -1);
|
||||||
|
goto cleanup;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if((dimensions % CHAR_BIT) != 0) {
|
||||||
|
sqlite3_result_error(context, "Binary quantization requires vectors with a length divisible by 8", -1);
|
||||||
|
goto cleanup;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int sz = dimensions / CHAR_BIT;
|
||||||
|
u8 *out = sqlite3_malloc(sz);
|
||||||
|
if (!out) {
|
||||||
|
sqlite3_result_error_code(context, SQLITE_NOMEM);
|
||||||
|
goto cleanup;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
memset(out, 0, sz);
|
||||||
|
|
||||||
|
switch(elementType) {
|
||||||
|
case SQLITE_VEC_ELEMENT_TYPE_FLOAT32: {
|
||||||
|
|
||||||
|
for (size_t i = 0; i < dimensions; i++) {
|
||||||
|
int res = ((f32 *)vector)[i] > 0.0;
|
||||||
|
out[i / 8] |= (res << (i % 8));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SQLITE_VEC_ELEMENT_TYPE_INT8: {
|
||||||
|
for (size_t i = 0; i < dimensions; i++) {
|
||||||
|
int res = ((i8 *)vector)[i] > 0;
|
||||||
|
out[i / 8] |= (res << (i % 8));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
|
||||||
|
sqlite3_result_error(context, "Can only binary quantize float or int8 vectors", -1);
|
||||||
|
sqlite3_free(out);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sqlite3_result_blob(context, out, sz, sqlite3_free);
|
||||||
|
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
|
||||||
|
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
vectorCleanup(vector);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vec_quantize_int8(sqlite3_context *context, int argc,
|
||||||
sqlite3_value **argv) {
|
sqlite3_value **argv) {
|
||||||
|
assert(argc == 2);
|
||||||
f32 *srcVector;
|
f32 *srcVector;
|
||||||
size_t dimensions;
|
size_t dimensions;
|
||||||
fvec_cleanup srcCleanup;
|
fvec_cleanup srcCleanup;
|
||||||
|
|
@ -1099,39 +1196,23 @@ static void vec_quantize_i8(sqlite3_context *context, int argc,
|
||||||
int sz = dimensions * sizeof(i8);
|
int sz = dimensions * sizeof(i8);
|
||||||
out = sqlite3_malloc(sz);
|
out = sqlite3_malloc(sz);
|
||||||
if (!out) {
|
if (!out) {
|
||||||
rc = SQLITE_NOMEM;
|
sqlite3_result_error_nomem(context);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
memset(out, 0, sz);
|
memset(out, 0, sz);
|
||||||
|
|
||||||
if (argc == 2) {
|
if ((sqlite3_value_type(argv[1]) != SQLITE_TEXT) ||
|
||||||
if ((sqlite3_value_type(argv[1]) != SQLITE_TEXT) ||
|
(sqlite3_value_bytes(argv[1]) != strlen("unit")) ||
|
||||||
(sqlite3_value_bytes(argv[1]) != strlen("unit")) ||
|
(sqlite3_stricmp((const char *)sqlite3_value_text(argv[1]), "unit") !=
|
||||||
(sqlite3_stricmp((const char *)sqlite3_value_text(argv[1]), "unit") !=
|
0)) {
|
||||||
0)) {
|
sqlite3_result_error(context, "2nd argument to vec_quantize_i8() must be 'unit'.", -1);
|
||||||
sqlite3_result_error(context,
|
|
||||||
"2nd argument to vec_quantize_i8() must be 'unit', "
|
|
||||||
"or ranges must be provided.",
|
|
||||||
-1);
|
|
||||||
sqlite3_free(out);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
f32 step = (1.0 - (-1.0)) / 255;
|
|
||||||
for (size_t i = 0; i < dimensions; i++) {
|
|
||||||
out[i] = ((srcVector[i] - (-1.0)) / step) - 128;
|
|
||||||
}
|
|
||||||
} else if (argc == 3) {
|
|
||||||
// f32 * minVector, maxVector;
|
|
||||||
// size_t d;
|
|
||||||
// fvec_cleanup minCleanup, maxCleanup;
|
|
||||||
// int rc = fvec_from_value(argv[1], )
|
|
||||||
|
|
||||||
sqlite3_free(out);
|
sqlite3_free(out);
|
||||||
// TODO
|
|
||||||
sqlite3_result_error(
|
|
||||||
context, "ranges parameter not supported in vec_quantize_i8 yet.", -1);
|
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
f32 step = (1.0 - (-1.0)) / 255;
|
||||||
|
for (size_t i = 0; i < dimensions; i++) {
|
||||||
|
out[i] = ((srcVector[i] - (-1.0)) / step) - 128;
|
||||||
|
}
|
||||||
|
|
||||||
sqlite3_result_blob(context, out, dimensions * sizeof(i8), sqlite3_free);
|
sqlite3_result_blob(context, out, dimensions * sizeof(i8), sqlite3_free);
|
||||||
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
|
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_INT8);
|
||||||
|
|
@ -1140,58 +1221,6 @@ cleanup:
|
||||||
srcCleanup(srcVector);
|
srcCleanup(srcVector);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vec_quantize_binary(sqlite3_context *context, int argc,
|
|
||||||
sqlite3_value **argv) {
|
|
||||||
assert(argc == 1);
|
|
||||||
void *vector;
|
|
||||||
size_t dimensions;
|
|
||||||
vector_cleanup cleanup;
|
|
||||||
char *pzError;
|
|
||||||
enum VectorElementType elementType;
|
|
||||||
int rc = vector_from_value(argv[0], &vector, &dimensions, &elementType,
|
|
||||||
&cleanup, &pzError);
|
|
||||||
if (rc != SQLITE_OK) {
|
|
||||||
sqlite3_result_error(context, pzError, -1);
|
|
||||||
sqlite3_free(pzError);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (elementType == SQLITE_VEC_ELEMENT_TYPE_FLOAT32) {
|
|
||||||
int sz = dimensions / CHAR_BIT;
|
|
||||||
u8 *out = sqlite3_malloc(sz);
|
|
||||||
if (!out) {
|
|
||||||
cleanup(vector);
|
|
||||||
sqlite3_result_error_code(context, SQLITE_NOMEM);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
memset(out, 0, sz);
|
|
||||||
for (size_t i = 0; i < dimensions; i++) {
|
|
||||||
int res = ((f32 *)vector)[i] > 0.0;
|
|
||||||
out[i / 8] |= (res << (i % 8));
|
|
||||||
}
|
|
||||||
sqlite3_result_blob(context, out, dimensions / CHAR_BIT, sqlite3_free);
|
|
||||||
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
|
|
||||||
} else if (elementType == SQLITE_VEC_ELEMENT_TYPE_INT8) {
|
|
||||||
int sz = dimensions / CHAR_BIT;
|
|
||||||
u8 *out = sqlite3_malloc(sz);
|
|
||||||
if (!out) {
|
|
||||||
cleanup(vector);
|
|
||||||
sqlite3_result_error_code(context, SQLITE_NOMEM);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
memset(out, 0, sz);
|
|
||||||
for (size_t i = 0; i < dimensions; i++) {
|
|
||||||
int res = ((i8 *)vector)[i] > 0;
|
|
||||||
out[i / 8] |= (res << (i % 8));
|
|
||||||
}
|
|
||||||
sqlite3_result_blob(context, out, dimensions / CHAR_BIT, sqlite3_free);
|
|
||||||
sqlite3_result_subtype(context, SQLITE_VEC_ELEMENT_TYPE_BIT);
|
|
||||||
} else {
|
|
||||||
sqlite3_result_error(context,
|
|
||||||
"Can only binary quantize float or int8 vectors", -1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) {
|
static void vec_add(sqlite3_context *context, int argc, sqlite3_value **argv) {
|
||||||
assert(argc == 2);
|
assert(argc == 2);
|
||||||
|
|
@ -2778,7 +2807,7 @@ static int vec_npy_eachColumnBuffer(vec_npy_each_cursor *pCur,
|
||||||
}
|
}
|
||||||
case SQLITE_VEC_ELEMENT_TYPE_INT8:
|
case SQLITE_VEC_ELEMENT_TYPE_INT8:
|
||||||
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
|
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
|
||||||
// TODO
|
// https://github.com/asg017/sqlite-vec/issues/42
|
||||||
sqlite3_result_error(context,
|
sqlite3_result_error(context,
|
||||||
"vec_npy_each only supports float32 vectors", -1);
|
"vec_npy_each only supports float32 vectors", -1);
|
||||||
break;
|
break;
|
||||||
|
|
@ -2806,7 +2835,7 @@ static int vec_npy_eachColumnFile(vec_npy_each_cursor *pCur,
|
||||||
}
|
}
|
||||||
case SQLITE_VEC_ELEMENT_TYPE_INT8:
|
case SQLITE_VEC_ELEMENT_TYPE_INT8:
|
||||||
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
|
case SQLITE_VEC_ELEMENT_TYPE_BIT: {
|
||||||
// TODO
|
// https://github.com/asg017/sqlite-vec/issues/42
|
||||||
sqlite3_result_error(context,
|
sqlite3_result_error(context,
|
||||||
"vec_npy_each only supports float32 vectors", -1);
|
"vec_npy_each only supports float32 vectors", -1);
|
||||||
break;
|
break;
|
||||||
|
|
@ -5902,13 +5931,13 @@ static sqlite3_module vec0Module = {
|
||||||
/* xCommit */ 0,
|
/* xCommit */ 0,
|
||||||
/* xRollback */ 0,
|
/* xRollback */ 0,
|
||||||
/* xFindFunction */ 0,
|
/* xFindFunction */ 0,
|
||||||
/* xRename */ 0, // TODO
|
/* xRename */ 0, // https://github.com/asg017/sqlite-vec/issues/43
|
||||||
/* xSavepoint */ 0,
|
/* xSavepoint */ 0,
|
||||||
/* xRelease */ 0,
|
/* xRelease */ 0,
|
||||||
/* xRollbackTo */ 0,
|
/* xRollbackTo */ 0,
|
||||||
/* xShadowName */ vec0ShadowName,
|
/* xShadowName */ vec0ShadowName,
|
||||||
#if SQLITE_VERSION_NUMBER >= 3044000
|
#if SQLITE_VERSION_NUMBER >= 3044000
|
||||||
/* xIntegrity */ 0, // TODO
|
/* xIntegrity */ 0, // https://github.com/asg017/sqlite-vec/issues/44
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
#pragma endregion
|
#pragma endregion
|
||||||
|
|
@ -6661,6 +6690,7 @@ __declspec(dllexport)
|
||||||
{"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
|
{"vec_distance_hamming",vec_distance_hamming, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
|
||||||
{"vec_distance_cosine", vec_distance_cosine, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
|
{"vec_distance_cosine", vec_distance_cosine, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
|
||||||
{"vec_length", vec_length, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
|
{"vec_length", vec_length, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE, },
|
||||||
|
{"vec_type", vec_type, 1, DEFAULT_FLAGS, },
|
||||||
{"vec_to_json", vec_to_json, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
{"vec_to_json", vec_to_json, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
||||||
{"vec_add", vec_add, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
{"vec_add", vec_add, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
||||||
{"vec_sub", vec_sub, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
{"vec_sub", vec_sub, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
||||||
|
|
@ -6669,8 +6699,7 @@ __declspec(dllexport)
|
||||||
{"vec_f32", vec_f32, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
{"vec_f32", vec_f32, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
||||||
{"vec_bit", vec_bit, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
{"vec_bit", vec_bit, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
||||||
{"vec_int8", vec_int8, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
{"vec_int8", vec_int8, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
||||||
{"vec_quantize_i8", vec_quantize_i8, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
{"vec_quantize_int8", vec_quantize_int8, 2, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
||||||
{"vec_quantize_i8", vec_quantize_i8, 3, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
|
||||||
{"vec_quantize_binary", vec_quantize_binary, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
{"vec_quantize_binary", vec_quantize_binary, 1, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE, },
|
||||||
{"vec_static_blob_from_raw", vec_static_blob_from_raw, 4, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE },
|
{"vec_static_blob_from_raw", vec_static_blob_from_raw, 4, DEFAULT_FLAGS | SQLITE_SUBTYPE | SQLITE_RESULT_SUBTYPE },
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
|
||||||
|
|
@ -110,12 +110,12 @@ FUNCTIONS = [
|
||||||
"vec_length",
|
"vec_length",
|
||||||
"vec_normalize",
|
"vec_normalize",
|
||||||
"vec_quantize_binary",
|
"vec_quantize_binary",
|
||||||
"vec_quantize_i8",
|
"vec_quantize_int8",
|
||||||
"vec_quantize_i8",
|
|
||||||
"vec_slice",
|
"vec_slice",
|
||||||
"vec_static_blob_from_raw",
|
"vec_static_blob_from_raw",
|
||||||
"vec_sub",
|
"vec_sub",
|
||||||
"vec_to_json",
|
"vec_to_json",
|
||||||
|
"vec_type",
|
||||||
"vec_version",
|
"vec_version",
|
||||||
]
|
]
|
||||||
MODULES = [
|
MODULES = [
|
||||||
|
|
@ -448,6 +448,20 @@ def test_vec_slice():
|
||||||
vec_slice(b"\xab\xab\xab\xab", 0, 0)
|
vec_slice(b"\xab\xab\xab\xab", 0, 0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_vec_type():
|
||||||
|
vec_type = lambda *args, a="?": db.execute(f"select vec_type({a})", args).fetchone()[0]
|
||||||
|
assert vec_type('[1]') == "float32"
|
||||||
|
assert vec_type(b"\xaa\xbb\xcc\xdd") == "float32"
|
||||||
|
assert vec_type('[1]', a='vec_f32(?)') == "float32"
|
||||||
|
assert vec_type('[1]', a='vec_int8(?)') == "int8"
|
||||||
|
assert vec_type(b"\xaa", a='vec_bit(?)') == "bit"
|
||||||
|
|
||||||
|
with _raises("invalid float32 vector"):
|
||||||
|
vec_type(b"\xaa")
|
||||||
|
with _raises("found NULL"):
|
||||||
|
vec_type(None)
|
||||||
|
|
||||||
|
|
||||||
def test_vec_add():
|
def test_vec_add():
|
||||||
vec_add = lambda *args, a="?", b="?": db.execute(
|
vec_add = lambda *args, a="?", b="?": db.execute(
|
||||||
f"select vec_add({a}, {b})", args
|
f"select vec_add({a}, {b})", args
|
||||||
|
|
@ -517,11 +531,11 @@ def test_vec_to_json():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="TODO")
|
@pytest.mark.skip(reason="TODO")
|
||||||
def test_vec_quantize_i8():
|
def test_vec_quantize_int8():
|
||||||
vec_quantize_i8 = lambda *args: db.execute(
|
vec_quantize_int8 = lambda *args: db.execute(
|
||||||
"select vec_quantize_i8()", args
|
"select vec_quantize_int8()", args
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
assert vec_quantize_i8() == 111
|
assert vec_quantize_int8() == 111
|
||||||
|
|
||||||
|
|
||||||
def test_vec_quantize_binary():
|
def test_vec_quantize_binary():
|
||||||
|
|
@ -1020,9 +1034,9 @@ def test_vec0_updates():
|
||||||
db.execute(
|
db.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO t3 VALUES
|
INSERT INTO t3 VALUES
|
||||||
(1, :x, vec_quantize_i8(:x, 'unit') ,vec_quantize_binary(:x)),
|
(1, :x, vec_quantize_int8(:x, 'unit') ,vec_quantize_binary(:x)),
|
||||||
(2, :y, vec_quantize_i8(:y, 'unit') ,vec_quantize_binary(:y)),
|
(2, :y, vec_quantize_int8(:y, 'unit') ,vec_quantize_binary(:y)),
|
||||||
(3, :z, vec_quantize_i8(:z, 'unit') ,vec_quantize_binary(:z));
|
(3, :z, vec_quantize_int8(:z, 'unit') ,vec_quantize_binary(:z));
|
||||||
""",
|
""",
|
||||||
{
|
{
|
||||||
"x": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]",
|
"x": "[.1, .1, .1, .1, -.1, -.1, -.1, -.1]",
|
||||||
|
|
@ -1795,7 +1809,7 @@ def test_vec0_knn():
|
||||||
db.executemany(
|
db.executemany(
|
||||||
"""
|
"""
|
||||||
INSERT INTO v VALUES
|
INSERT INTO v VALUES
|
||||||
(:id, :vector, vec_quantize_i8(:vector, 'unit') ,vec_quantize_binary(:vector));
|
(:id, :vector, vec_quantize_int8(:vector, 'unit') ,vec_quantize_binary(:vector));
|
||||||
""",
|
""",
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue