sqlite-vec/reference.yaml

sections:
  constructors:
    title: Constructors
    desc: |
      SQL functions that "construct" vectors with different element types.

      Currently, only `float32`, `int8`, and `bit` vectors are supported.

  op:
    title: Operations
    desc: |
      Different operations and utilities for working with vectors.
  distance:
    title: Distance functions
    desc: Various algorithms to calculate distance between two vectors.
  quantization:
    title: Quantization
    desc: Various techniques to "compress" a vector by reducing precision and accuracy.
  numpy:
    title: "NumPy Utilities"
    desc: Functions to read data from or work with [NumPy arrays](https://numpy.org/doc/stable/reference/generated/numpy.array.html).
  meta:
    title: Meta
    desc: Helper functions to debug `sqlite-vec` installations.
  entrypoints:
    title: Entrypoints
    desc: All the named entrypoints that load in different `sqlite-vec` functions and options.
#  vec0:
#    title: "vec0 Virtual Table"
#    desc: TODO
meta:
  vec_version:
    params: []
    desc: Returns a version string of the current `sqlite-vec` installation.
    example: select vec_version();
  vec_debug:
    params: []
    desc: Returns debugging information of the current `sqlite-vec` installation.
    example: select vec_debug();
constructors:
  vec_f32:
    params: [vector]
    desc: |
      Creates a float vector from a BLOB or JSON text. If a BLOB is provided,
      the length must be divisible by 4, as a float takes up 4 bytes of space each.

      The returned value is a BLOB with 4 bytes per element, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
      of `223`.
    example:
      - select vec_f32('[.1, .2, .3, 4]');
      - select subtype(vec_f32('[.1, .2, .3, 4]'));
      - select vec_f32(X'AABBCCDD');
      - select vec_to_json(vec_f32(X'AABBCCDD'));
      - select vec_f32(X'AA');
  vec_int8:
    params: [vector]
    desc: |
      Creates a 8-bit integer vector from a BLOB or JSON text. If a BLOB is provided,
      the length must be divisible by 4, as a float takes up 4 bytes of space each.
      If JSON text is provided, each element must be an integer between -128 and 127 inclusive.

      The returned value is a BLOB with 1 byte per element, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
      of `225`.

    example:
      - select vec_int8('[1, 2, 3, 4]');
      - select subtype(vec_int8('[1, 2, 3, 4]'));
      - select vec_int8(X'AABBCCDD');
      - select vec_to_json(vec_int8(X'AABBCCDD'));
      - select vec_int8('[999]');

  vec_bit:
    params: [vector]
    desc: |
      Creates a binary vector from a BLOB.

      The returned value is a BLOB with 1 byte per 8 elements, with a special [subtype](https://www.sqlite.org/c3ref/result_subtype.html)
      of `224`.
    example:
      - select vec_bit(X'F0');
      - select subtype(vec_bit(X'F0'));
      - select vec_to_json(vec_bit(X'F0'));
op:
  vec_length:
    params: [vector]
    desc: |
      Returns the number of elements in the given vector.
      The vector can be `JSON`, `BLOB`, or the result of a [constructor function](#constructors).

      This function will return an error if `vector` is invalid.
    example:
      - select vec_length('[.1, .2]');
      - select vec_length(X'AABBCCDD');
      - select vec_length(vec_int8(X'AABBCCDD'));
      - select vec_length(vec_bit(X'AABBCCDD'));
      - select vec_length(X'CCDD');
  vec_type:
    params: [vector]
    desc: |
      Returns the name of the type of `vector` as text. One of `'float32'`, `'int8'`, or `'bit'`.

      This function will return an error if `vector` is invalid.
    example:
      - select vec_type('[.1, .2]');
      - select vec_type(X'AABBCCDD');
      - select vec_type(vec_int8(X'AABBCCDD'));
      - select vec_type(vec_bit(X'AABBCCDD'));
      - select vec_type(X'CCDD');
  vec_add:
    params: [a, b]
    desc: |
      Adds every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
      must be of the same type and same length. Only `float32` and `int8` vectors are supported.

      An error is raised if either `a` or `b` are invalid, or if they are not the same type or same length.

      See also [`vec_sub()`](#vec_sub).

    example:
      - |
        select vec_add(
          '[.1, .2, .3]',
          '[.4, .5, .6]'
        );
      - |
        select vec_to_json(
          vec_add(
            '[.1, .2, .3]',
            '[.4, .5, .6]'
          )
        );
      - |
        select vec_to_json(
          vec_add(
            vec_int8('[1, 2, 3]'),
            vec_int8('[4, 5, 6]')
          )
        );
      - select vec_add('[.1]', vec_int8('[1]'));
      - select vec_add(vec_bit(X'AA'), vec_bit(X'BB'));
  vec_sub:
    params: [a, b]
    desc: |
      Subtracts every element in vector `a` with vector `b`, returning a new vector `c`. Both vectors
      must be of the same type and same length. Only `float32` and `int8` vectors are supported.

      An error is raised if either `a` or `b` are invalid, or if they are not the same type or same length.

      See also [`vec_add()`](#vec_add).

    example:
      - |
        select vec_sub(
          '[.1, .2, .3]',
          '[.4, .5, .6]'
        );
      - |
        select vec_to_json(
          vec_sub(
            '[.1, .2, .3]',
            '[.4, .5, .6]'
          )
        );
      - |
        select vec_to_json(
          vec_sub(
            vec_int8('[1, 2, 3]'),
            vec_int8('[4, 5, 6]')
          )
        );
      - select vec_sub('[.1]', vec_int8('[1]'));
      - select vec_sub(vec_bit(X'AA'), vec_bit(X'BB'));
  vec_normalize:
    params: [vector]
    desc: |
      Performs L2 normalization on the given vector. Only float32 vectors are currently supported.

      Returns an error if the input is an invalid vector or not a float32 vector.
    example:
      - select vec_normalize('[2, 3, 1, -4]');
      - |
        select vec_to_json(
          vec_normalize('[2, 3, 1, -4]')
        );
      - |
        -- for matryoshka embeddings - slice then normalize
        select vec_to_json(
          vec_normalize(
            vec_slice('[2, 3, 1, -4]', 0, 2)
          )
        );
  vec_slice:
    params: [vector, start, end]
    desc: |
      Extract a subset of `vector` from the `start` element (inclusive) to the `end` element (exclusive). TODO check

      This is especially useful for [Matryoshka embeddings](#TODO), also known as "adaptive length" embeddings.
      Use with [`vec_normalize()`](#vec_normalize) to get proper results.

      Returns an error in the following conditions:
        - If `vector` is not a valid vector
        - If `start` is less than zero or greater than or equal to `end`
        - If `end` is greater than the length of `vector`, or less than or equal to `start`.
        - If `vector` is a bitvector, `start` and `end` must be divisible by 8.
    example:
      - select vec_slice('[1, 2,3, 4]', 0, 2);
      - |
        select vec_to_json(
          vec_slice('[1, 2,3, 4]', 0, 2)
        );
      - |
        select vec_to_json(
          vec_slice('[1, 2,3, 4]', 2, 4)
        );
      - |
        select vec_to_json(
          vec_slice('[1, 2,3, 4]', -1, 4)
        );
      - |
        select vec_to_json(
          vec_slice('[1, 2,3, 4]', 0, 5)
        );
      - |
        select vec_to_json(
          vec_slice('[1, 2,3, 4]', 0, 0)
        );
  vec_to_json:
    params: [vector]
    desc: |
      Represents a vector as JSON text. The input vector can be a vector BLOB or JSON text.

      Returns an error if `vector` is an invalid vector, or when memory cannot be allocated.
    example:
      - select vec_to_json(X'AABBCCDD');
      - select vec_to_json(vec_int8(X'AABBCCDD'));
      - select vec_to_json(vec_bit(X'AABBCCDD'));
      - select vec_to_json('[1,2,3,4]');
      - select vec_to_json('invalid');
  vec_each:
    params: [vector]
    desc: |
      A table function to iterate through every element in a vector. One row id returned per element in a vector.

      ```sql
      CREATE TABLE vec_each(
        rowid int,    -- The
        vector HIDDEN -- input parameter: A well-formed vector value
      )
      ```

      Returns an error if `vector` is not a valid vector.
    example:
      - select rowid, value from vec_each('[1,2,3,4]');
      - select rowid, value from vec_each(X'AABBCCDD00112233');
      - select rowid, value from vec_each(vec_int8(X'AABBCCDD'));
      - select rowid, value from vec_each(vec_bit(X'F0'));

distance:
  vec_distance_L2:
    params: [a, b]
    desc: |
      Calculates the L2 euclidian distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.

      Returns an error under the following conditions:
      - `a` or `b` are invalid vectors
      - `a` or `b` do not share the same vector element types (ex float32 or int8)
      - `a` or `b` are bit vectors. Use [`vec_distance_hamming()`](#vec_distance_hamming) for distance calculations between two bitvectors.
      - `a` or `b` do not have the same length.
    example:
      - select vec_distance_L2('[1, 1]', '[2, 2]');
      - select vec_distance_L2('[1, 1]', '[-2, -2]');
      - select vec_distance_L2('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
      - select vec_distance_L2(X'AABBCCDD', X'00112233');
      - select vec_distance_L2('[1, 1]', vec_int8('[2, 2]'));
      - select vec_distance_L2(vec_bit(X'AA'), vec_bit(X'BB'));
  vec_distance_cosine:
    params: [a, b]
    desc: |
      Calculates the cosine distance between vectors `a` and `b`. Only valid for float32 or int8 vectors.

      Returns an error under the following conditions:
        - `a` or `b` are invalid vectors
        - `a` or `b` do not share the same vector element types (ex float32 or int8)
        - `a` or `b` are bit vectors. Use [`vec_distance_hamming()`](#vec_distance_hamming) for distance calculations between two bitvectors.
        - `a` or `b` do not have the same length.
    example:
      - select vec_distance_cosine('[1, 1]', '[2, 2]');
      - select vec_distance_cosine('[1, 1]', '[-2, -2]');
      - select vec_distance_cosine('[1.1, 2.2, 3.3]', '[4.4, 5.5, 6.6]');
      - select vec_distance_cosine(X'AABBCCDD', X'00112233');
      - select vec_distance_cosine('[1, 1]', vec_int8('[2, 2]'));
      - select vec_distance_cosine(vec_bit(X'AA'), vec_bit(X'BB'));
  vec_distance_hamming:
    params: [a, b]
    desc: |
      Calculates the hamming distance between two bitvectors `a` and `b`. Only valid for bitvectors.

      Returns an error under the following conditions:
      - `a` or `b` are not bitvectors
      - `a` and `b` do not share the same length
      - Memory cannot be allocated
    example:
      - select vec_distance_hamming(vec_bit(X'00'), vec_bit(X'FF'));
      - select vec_distance_hamming(vec_bit(X'FF'), vec_bit(X'FF'));
      - select vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'44'));
      - select vec_distance_hamming('[1, 1]', '[0, 0]');

quantization:
  vec_quantize_binary:
    params: [vector]
    desc: |
      Quantize a float32 or int8 vector into a bitvector.
      For every element in the vector, a `1` is assigned to positive numbers and a `0` is assigned to negative numbers.
      These values are then packed into a bit vector.

      Returns an error if `vector` is invalid, or if `vector` is not a float32 or int8 vector.
    example:
      - select vec_quantize_binary('[1, 2, 3, 4, 5, 6, 7, 8]');
      - select vec_quantize_binary('[1, 2, 3, 4, -5, -6, -7, -8]');
      - select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
      - select vec_quantize_binary('[-1, -2, -3, -4, -5, -6, -7, -8]');
      - select vec_quantize_binary(vec_int8(X'11223344'));
      - select vec_quantize_binary(vec_bit(X'FF'));
  vec_quantize_i8:
    params: [vector, "[start]", "[end]"]
    desc: x
    example: select 'todo';
vec0:
  vec0:
    params: []
    desc: TODO
    example:
      - |
        create virtual table vec_items using vec0(
          contents_embedding float[4]
        );
      - |
        insert into vec_items(rowid, contents_embedding)
        values (1, '[1, 1, 1, 1]'),
          (2, '[2, 2, 2, 2]'),
          (3, '[3, 3, 3, 3]');