This commit is contained in:
Alex Garcia 2024-07-31 12:55:03 -07:00
parent 4febdff11a
commit 356f75cca7
17 changed files with 350 additions and 166 deletions

View file

@ -1,6 +1,6 @@
import { defineConfig, DefaultTheme, HeadConfig } from "vitepress"; import { DefaultTheme, defineConfig, HeadConfig } from "vitepress";
import { readFileSync } from "node:fs"; import { readFileSync } from "node:fs";
import { join, dirname } from "node:path"; import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url"; import { fileURLToPath } from "node:url";
const PROJECT = "sqlite-vec"; const PROJECT = "sqlite-vec";
@ -105,8 +105,12 @@ function nav(): DefaultTheme.NavItem[] {
link: `https://crates.io/crates/${PROJECT}`, link: `https://crates.io/crates/${PROJECT}`,
}, },
{ {
text: "Golang: Go module", text: "Golang: Go module (CGO)",
link: `https://pkg.go.dev/github.com/asg017/${PROJECT}/bindings/go/cgo`, link: `https://pkg.go.dev/github.com/asg017/${PROJECT}-go-bindings/cgo`,
},
{
text: "Golang: Go module (WASM ncruces)",
link: `https://pkg.go.dev/github.com/asg017/${PROJECT}-go-bindings/ncruces`,
}, },
{ {
text: "Datasette: Plugin", text: "Datasette: Plugin",
@ -139,16 +143,7 @@ function sidebar(): DefaultTheme.SidebarItem[] {
}, },
], ],
}, },
{
text: "Features",
collapsed: true,
items: [
{ text: "Vector formats", link: "/vector-formats" },
{ text: "KNN queries", link: "/knn" },
{ text: "vec0 virtual vables", link: "/vec0" },
{ text: "Static blobs", link: "/numpy" },
],
},
{ {
text: "Using with...", text: "Using with...",
collapsed: true, collapsed: true,
@ -159,11 +154,21 @@ function sidebar(): DefaultTheme.SidebarItem[] {
{ text: "Rust", link: "/rust" }, { text: "Rust", link: "/rust" },
{ text: "Go", link: "/go" }, { text: "Go", link: "/go" },
{ text: "C/C++", link: "/c" }, { text: "C/C++", link: "/c" },
{ text: "WebAssembly (Browser)", link: "/wasm" }, { text: "Browser (WASM)", link: "/wasm" },
{ text: "Datasette", link: "/datasette" }, { text: "Datasette", link: "/datasette" },
{ text: "sqlite-utils", link: "/sqlite-utils" }, { text: "sqlite-utils", link: "/sqlite-utils" },
], ],
}, },
{
text: "Features",
collapsed: true,
items: [
{ text: "Vector formats", link: "/features/vector-formats" },
{ text: "KNN queries", link: "/features/knn" },
{ text: "vec0 virtual vables", link: "/features/vec0" },
{ text: "Static blobs", link: "/features/static-blobs" },
],
},
guides, guides,
{ {
text: "Documentation", text: "Documentation",
@ -172,10 +177,6 @@ function sidebar(): DefaultTheme.SidebarItem[] {
{ text: "API Reference", link: "/api-reference" }, { text: "API Reference", link: "/api-reference" },
], ],
}, },
{
text: "Sponsors",
link: "/sponsors",
},
{ {
text: "See also", text: "See also",
items: [ items: [

View file

@ -639,7 +639,7 @@ Returns a version string of the current `sqlite-vec` installation.
```sql ```sql
select vec_version(); select vec_version();
-- 'v0.0.1-alpha.36' -- 'v0.0.1-alpha.37'
``` ```
@ -651,9 +651,9 @@ Returns debugging information of the current `sqlite-vec` installation.
```sql ```sql
select vec_debug(); select vec_debug();
/* /*
'Version: v0.0.1-alpha.36 'Version: v0.0.1-alpha.37
Date: 2024-07-16T23:06:41Z-0700 Date: 2024-07-23T14:09:43Z-0700
Commit: e507bc0230de6dc44c7ff3b4895785edd734f31d Commit: 77f9b0374c8129056b344854de2dff6b103e5729
Build flags: avx ' Build flags: avx '
*/ */

View file

@ -1,7 +1,7 @@
import Database from "better-sqlite3"; import Database from "better-sqlite3";
import { load } from "js-yaml"; import { load } from "js-yaml";
import { fileURLToPath } from "node:url"; import { fileURLToPath } from "node:url";
import { resolve, dirname } from "node:path"; import { dirname, resolve } from "node:path";
import { readFileSync, writeFileSync } from "node:fs"; import { readFileSync, writeFileSync } from "node:fs";
import * as v from "valibot"; import * as v from "valibot";
import { table } from "table"; import { table } from "table";
@ -24,11 +24,11 @@ sqlite-vec is pre-v1, so expect breaking changes.
const REF_PATH = resolve( const REF_PATH = resolve(
dirname(fileURLToPath(import.meta.url)), dirname(fileURLToPath(import.meta.url)),
"../reference.yaml" "../reference.yaml",
); );
const EXT_PATH = resolve( const EXT_PATH = resolve(
dirname(fileURLToPath(import.meta.url)), dirname(fileURLToPath(import.meta.url)),
"../dist/vec0" "../dist/vec0",
); );
const DocSchema = v.objectWithRest( const DocSchema = v.objectWithRest(
@ -38,7 +38,7 @@ const DocSchema = v.objectWithRest(
v.object({ v.object({
title: v.string(), title: v.string(),
desc: v.string(), desc: v.string(),
}) }),
), ),
}, },
v.record( v.record(
@ -47,8 +47,8 @@ const DocSchema = v.objectWithRest(
params: v.array(v.string()), params: v.array(v.string()),
desc: v.string(), desc: v.string(),
example: v.union([v.string(), v.array(v.string())]), example: v.union([v.string(), v.array(v.string())]),
}) }),
) ),
); );
const tableConfig = { const tableConfig = {
@ -92,9 +92,10 @@ function formatSingleValue(value) {
s += "'"; s += "'";
return `-- ${s}`; return `-- ${s}`;
} }
if (typeof value === "object" || Array.isArray(value)) if (typeof value === "object" || Array.isArray(value)) {
return "-- " + JSON.stringify(value, null, 2); return "-- " + JSON.stringify(value, null, 2);
} }
}
function formatValue(value) { function formatValue(value) {
if (typeof value === "string") return `'${value}'`; if (typeof value === "string") return `'${value}'`;
if (typeof value === "number") return value; if (typeof value === "number") return value;
@ -107,9 +108,10 @@ function formatValue(value) {
s += "'"; s += "'";
return s; return s;
} }
if (typeof value === "object" || Array.isArray(value)) if (typeof value === "object" || Array.isArray(value)) {
return JSON.stringify(value, null, 2); return JSON.stringify(value, null, 2);
} }
}
function tableize(stmt, results) { function tableize(stmt, results) {
const columnNames = stmt.columns().map((c) => c.name); const columnNames = stmt.columns().map((c) => c.name);
const rows = results.map((row) => const rows = results.map((row) =>
@ -159,8 +161,7 @@ function renderExamples(db, name, example) {
continue; continue;
} }
const result = const result = results.length > 1 || stmt.columns().length > 1
results.length > 1 || stmt.columns().length > 1
? `/*\n${tableize(stmt, results)}\n*/\n` ? `/*\n${tableize(stmt, results)}\n*/\n`
: formatSingleValue(results[0][0]); : formatSingleValue(results[0][0]);
md += result + "\n\n"; md += result + "\n\n";
@ -182,9 +183,11 @@ for (const section in doc.sections) {
md += doc.sections[section].desc; md += doc.sections[section].desc;
md += "\n\n"; md += "\n\n";
for (const [name, { params, desc, example }] of Object.entries( for (
doc[section] const [name, { params, desc, example }] of Object.entries(
)) { doc[section],
)
) {
const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`; const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`;
md += "### " + headerText + "\n\n"; md += "### " + headerText + "\n\n";

View file

@ -0,0 +1,27 @@
# Compiling `sqlite-vec`
## From Source
```bash
git clone https://github.com/asg017/sqlite-vec
cd sqlite-vec
make loadable
```
`dist/vec0`
## From the amalgammation build
```
https://github.com/asg017/sqlite-vec/releases/download/latest/TODO
```
## Compile-time options
SQLITE_VEC_ENABLE_AVX
SQLITE_VEC_ENABLE_NEON
SQLITE_VEC_OMIT_FS

15
site/features/knn.md Normal file
View file

@ -0,0 +1,15 @@
# KNN queries
## `vec0` virtual tables
## Manually with `vec_distance_l2()`
```sql
create table items(
contents text,
contents_embedding float[768] (check vec_f32(contents_embedding))
);
```
## Static Blobs

View file

@ -31,8 +31,11 @@ gem install sqlite-vec
cargo add sqlite-vec cargo add sqlite-vec
``` ```
```bash [Go] ```bash [Go (CGO)]
go get -u github.com/asg017/sqlite-vec/bindings/go/cgo go get -u github.com/asg017/sqlite-vec-go-bindings/cgo
```
```bash [Go (ncruces WASM)]
go get -u github.com/asg017/sqlite-vec-go-bindings/ncruces
``` ```
```bash [Datasette] ```bash [Datasette]
@ -45,5 +48,31 @@ sqlite-utils install sqlite-utils-sqlite-vec
::: :::
## Pre-compiled extensions
Alternatively, you can download pre-compiled loadable extensions from the Alternatively, you can download pre-compiled loadable extensions from the
[`sqlite-vec` Github Releases](https://github.com/asg017/sqlite-vec/releases/latest). [`sqlite-vec` Github Releases](https://github.com/asg017/sqlite-vec/releases/latest).
There's also an `install.sh` script that will automatically download the appropriate pre-compiled extension from Github Releases to your machine.
```sh
# yolo
curl -L https://github.com/asg017/sqlite-vec/releases/download/latest/install.sh | sh
```
```sh
# ok lets play it safe
curl -o install.sh -L https://github.com/asg017/sqlite-vec/releases/download/latest/install.sh
# inspect your scripts
cat install.sh
# TODO Test if execute permissions?
./install.sh
```
## Compiling
`sqlite-vec` is a single `sqlite-vec.c` and `sqlite-vec.h`, and can be easily compiled for different platforms, or statically linked into larger applications.
See [*Compiling `sqlite-vec`*](#compiling) for more information.

View file

@ -2,17 +2,17 @@
Matryoshka embeddings are a new class of embedding models introduced in the Matryoshka embeddings are a new class of embedding models introduced in the
TODO-YYY paper [_TODO title_](https://arxiv.org/abs/2205.13147). They allow one TODO-YYY paper [_TODO title_](https://arxiv.org/abs/2205.13147). They allow one
to truncate excess dimensions in large vector, without lossing much quality. to truncate excess dimensions in large vector, without sacrificing much quality.
Let's say your embedding model generate 1024-dimensional vectors. If you have 1 Let's say your embedding model generate 1024-dimensional vectors. If you have 1
million of these 1024-dimensional vectors, they would take up `4.096 GB` of million of these 1024-dimensional vectors, they would take up `4.096 GB` of
space! You're not able to reduce the dimensions without lossing a lot of space! You're not able to reduce the dimensions without losing a lot of
quality - if you were to remove half of the dimensions 512-dimensional vectors, quality - if you were to remove half of the dimensions 512-dimensional vectors,
you could expect to also lose 50% or more of the quality of results. There are you could expect to also lose 50% or more of the quality of results. There are
other dimensional-reduction techniques, like [PCA](#TODO), but this requires a other dimensional-reduction techniques, like [PCA](#TODO) or [Product Quantization](#TODO), but they typically require
complicated and expensive training process. complicated and expensive training processes.
Matryoshka embeddings, on the other hand, _can_ be truncated, without losing Matryoshka embeddings, on the other hand, _can_ be truncated, without losing much
quality. Using [`mixedbread.ai`](#TODO) `mxbai-embed-large-v1` model, they claim quality. Using [`mixedbread.ai`](#TODO) `mxbai-embed-large-v1` model, they claim
that that
@ -20,16 +20,20 @@ They are called "Matryoshka" embeddings because ... TODO
## Matryoshka Embeddings with `sqlite-vec` ## Matryoshka Embeddings with `sqlite-vec`
You can use a combination of [`vec_slice()`](/api-reference#vec_slice) and You can use a combination of [`vec_slice()`](../api-reference.md#vec_slice) and
[`vec_normalize()`](/api-reference#vec_slice) on Matryoshka embeddings to [`vec_normalize()`](../api-reference.md#vec_slice) on Matryoshka embeddings to
truncate. truncate.
```sql ```sql
select select
vec_normalize(vec_slice(title_embeddings, 0, 256)) as title_embeddings_256d vec_normalize(
vec_slice(title_embeddings, 0, 256)
) as title_embeddings_256d
from vec_articles; from vec_articles;
``` ```
[`vec_slice()`](../api-reference.md#vec_slice) will cut down the vector to the first 256 dimensions. Then [`vec_normalize()`](../api-reference.md#vec_normalize) will normalize that truncated vector, which is typically a required step for Matryoshka embeddings.
## Benchmarks ## Benchmarks
## Suppported Models ## Suppported Models
@ -47,3 +51,7 @@ https://www.mixedbread.ai/blog/binary-mrl
`mxbai-embed-large-v1`: 1024, 512, 256, 128, 64 `mxbai-embed-large-v1`: 1024, 512, 256, 128, 64
`nomic-embed-text-v1.5`: 768, 512, 256, 128, 64 `nomic-embed-text-v1.5`: 768, 512, 256, 128, 64
```
# TODO new snowflake model
```

View file

@ -1,12 +1,12 @@
import { readFileSync } from "node:fs"; import { readFileSync } from "node:fs";
import { join, dirname } from "node:path"; import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url"; import { fileURLToPath } from "node:url";
const PROJECT = "sqlite-vec"; const PROJECT = "sqlite-vec";
const VERSION = readFileSync( const VERSION = readFileSync(
join(dirname(fileURLToPath(import.meta.url)), "..", "VERSION"), join(dirname(fileURLToPath(import.meta.url)), "..", "VERSION"),
"utf8" "utf8",
); );
export default { export default {

View file

@ -1,5 +1,7 @@
# Using `sqlite-vec` in Datasette # Using `sqlite-vec` in Datasette
[![Datasette](https://img.shields.io/pypi/v/datasette-sqlite-vec.svg?color=B6B6D9&label=Datasette+plugin&logoColor=white&logo=python)](https://datasette.io/plugins/datasette-sqlite-vec)
```bash ```bash
datasette install datasette-sqlite-vec datasette install datasette-sqlite-vec
``` ```

View file

@ -1,5 +1,7 @@
# Using `sqlite-vec` in Go # Using `sqlite-vec` in Go
[![Go Reference](https://pkg.go.dev/badge/github.com/asg017/sqlite-vec-go-bindings/cgo.svg)](https://pkg.go.dev/github.com/asg017/sqlite-vec-go-bindings/cgo) [![Go Reference](https://pkg.go.dev/badge/github.com/asg017/sqlite-vec-go-bindings/ncruces.svg)](https://pkg.go.dev/github.com/asg017/sqlite-vec-go-bindings/ncruces)
There are two ways you can embed `sqlite-vec` into Go applications: a CGO option There are two ways you can embed `sqlite-vec` into Go applications: a CGO option
for libraries like for libraries like
[`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3), or a [`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3), or a
@ -8,14 +10,87 @@ WASM-based option with
## Option 1: CGO ## Option 1: CGO
If using [`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3) or another CGO-based SQLite library, then use the `github.com/asg017/sqlite-vec-go-bindings/cgo` module to embed `sqlite-vec` into your Go application.
```bash ```bash
go get -u github.com/asg017/sqlite-vec/bindings/go/cgo go get -u github.com/asg017/sqlite-vec-go-bindings/cgo
```
This will compile and statically link `sqlite-vec` into your project. The initial build will be slow, but later builds will be cached and much faster.
Use `sqlite_vec.Auto()` to enable `sqlite-vec` functions in all future database connections. Also `sqlite_vec.Cancel()` is available to undo `Auto()`.
```go
package main
import (
"database/sql"
"log"
sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo"
_ "github.com/mattn/go-sqlite3"
)
func main() {
sqlite_vec.Auto()
db, err := sql.Open("sqlite3", ":memory:")
if err != nil {
log.Fatal(err)
}
defer db.Close()
var vecVersion string
err = db.QueryRow("select vec_version()").Scan(&vecVersion)
if err != nil {
log.Fatal(err)
}
log.Printf("sqlite_version=%s, vec_version=%s\n",vecVersion)
}
``` ```
## Option 2: WASM based with `ncruces/go-sqlite3` ## Option 2: WASM based with `ncruces/go-sqlite3`
``` [`github.com/ncruces/go-sqlite3`](https://github.com/ncruces/go-sqlite3) is an alternative SQLite Go driver that avoids CGO by using a custom WASM build of SQLite. To use `sqlite-vec` from this library, use the specicial WASM binary provided in `github.com/asg017/sqlite-vec-go-bindings/ncruces`.
go
```bash
go get -u github.com/asg017/sqlite-vec-go-bindings/ncruces
``` ```
```go
package main
import (
_ "embed"
"log"
_ "github.com/asg017/sqlite-vec-go-bindings/ncruces"
"github.com/ncruces/go-sqlite3"
)
func main() {
db, err := sqlite3.Open(":memory:")
if err != nil {
log.Fatal(err)
}
stmt, _, err := db.Prepare(`SELECT sqlite_version(), vec_version()`)
if err != nil {
log.Fatal(err)
}
stmt.Step()
log.Printf("vec_version=%s\n", stmt.ColumnText(0))
stmt.Close()
}
```
The `github.com/asg017/sqlite-vec-go-bindings/ncruces` package embeds a custom WASM build of SQLite, so there's no need to use `github.com/ncruces/go-sqlite3/embed`.
## Working with vectors in Go ## Working with vectors in Go
If vectors are provided as a list of floats, use `SerializeFloat32(list)` to serialize them into the compact BLOB format that `sqlite-vec` expects.
```go
TODO
```

View file

@ -56,41 +56,48 @@ accessor to bind as a parameter to `sqlite-vec` SQL functions.
```js ```js
// TODO // TODO
const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]); const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]);
const stmt = db.prepare("INSERT INTO vss_demo VALUES (?)"); const stmt = db.prepare("select vec_length(?)");
stmt.run(embedding.buffer); console.log(stmt.run(embedding.buffer));
``` ```
## Node.js ## Node.js
Here's a quick recipe of using `sqlite-vec` with [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3) in Node.js. Here's a quick recipe of using `sqlite-vec` with
[`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3) in Node.js.
```js ```js
``` ```
See [`simple-node/demo.mjs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-node/demo.mjs) See
[`simple-node/demo.mjs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-node/demo.mjs)
for a more complete Node.js demo. for a more complete Node.js demo.
## Deno ## Deno
Here's a quick recipe of using `sqlite-vec` with [`jsr:@db/sqlite`](https://jsr.io/@db/sqlite) in Deno. It will only work on Deno version `1.44` or greater, because of a bug in previous Deno version. Here's a quick recipe of using `sqlite-vec` with
[`jsr:@db/sqlite`](https://jsr.io/@db/sqlite) in Deno. It will only work on Deno
version `1.44` or greater, because of a bug in previous Deno version.
Keep in mind, the `better-sqlite3` example above also works in Deno, you just need to prefix the `better-sqlite3` import with `npm:`, like `import * from "npm:better-sqlite3"`. Keep in mind, the `better-sqlite3` example above also works in Deno, you just
need to prefix the `better-sqlite3` import with `npm:`, like
`import * from "npm:better-sqlite3"`.
```ts ```ts
``` ```
See [`simple-deno/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-deno/demo.ts) See
[`simple-deno/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-deno/demo.ts)
for a more complete Deno demo. for a more complete Deno demo.
## Bun ## Bun
Here's a quick recipe of using `sqlite-vec` with [`bun:sqlite`](https://bun.sh/docs/api/sqlite) in Bun. The `better-sqlite3` example above also works with Bun. Here's a quick recipe of using `sqlite-vec` with
[`bun:sqlite`](https://bun.sh/docs/api/sqlite) in Bun. The `better-sqlite3`
example above also works with Bun.
```ts ```ts
``` ```
See [`simple-bun/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-bun/demo.ts) See
[`simple-bun/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-bun/demo.ts)
for a more complete Bun demo. for a more complete Bun demo.

View file

@ -34,126 +34,77 @@ print(f"vec_version={vec_version}")
### Lists ### Lists
If the vectors you are working with are provided as a list of floats, you can convert them into the compact BLOB format that `sqlite-vec` uses with [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack). If your vectors in Python are provided as a list of floats, you can
convert them into the compact BLOB format that `sqlite-vec` uses with
`serialize_float32()`. This will internally call [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack).
```python ```python
import struct from sqlite_vec import serialize_float32
def serialize(vector: List[float]) -> bytes:
""" serializes a list of floats into a compact "raw bytes" format """
return struct.pack('%sf' % len(vector), *vector)
embedding = [0.1, 0.2, 0.3, 0.4] embedding = [0.1, 0.2, 0.3, 0.4]
result = db.execute('select vec_length(?)', [serialize(embedding)]).fetchone()[0] result = db.execute('select vec_length(?)', [serialize_float32(embedding)])
print(result) # 4 print(result.fetchone()[0]) # 4
``` ```
### NumPy Arrays ### NumPy Arrays
If your vectors are from `numpy` arrays, the Python SQLite package allows you to pass it along as-is. Make sure you convert your array elements to 32-bit floats with [`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html), as some embedding services will use `np.float64` elements. If your vectors are NumPy arrays, the Python SQLite package allows you to
pass it along as-is, since NumPy arrays implement [the Buffer protocol](https://docs.python.org/3/c-api/buffer.html). Make sure you cast your array elements to 32-bit floats
with
[`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html),
as some embeddings will use `np.float64`.
```python ```python
import numpy as np import numpy as np
import sqlite3
import sqlite_vec
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)
db.execute("CREATE VIRTUAL TABLE vec_demo(sample_embedding float[4])")
embedding = np.array([0.1, 0.2, 0.3, 0.4]) embedding = np.array([0.1, 0.2, 0.3, 0.4])
db.execute( db.execute(
"INSERT INTO vec_demo(sample_embedding) VALUES (?)", [embedding.astype(np.float32)] "SELECT vec_length(?)", [embedding.astype(np.float32)]
) ) # 4
``` ```
## Recipes
### OpenAI
https://platform.openai.com/docs/guides/embeddings/what-are-embeddings?lang=python
TODO
```python
from openai import OpenAI
import sqlite3
import sqlite_vec
texts = [
'Capri-Sun is a brand of juice concentratebased drinks manufactured by the German company Wild and regional licensees.',
'Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.',
'George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.',
'Alan Mathison Turing was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.',
'Alaqua Cox is a Native American (Menominee) actress.'
]
# change ':memory:' to a filepath to persist data
db = sqlite3.connect(':memory:')
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)
client = OpenAI()
response = client.embeddings.create(
input=[texts],
model="text-embedding-3-small"
)
print(response.data[0].embedding)
```
### llamafile
https://github.com/Mozilla-Ocho/llamafile
TODO
### llama-cpp-python
https://github.com/abetlen/llama-cpp-python
TODO
### sentence-transformers (etc.)
https://github.com/UKPLab/sentence-transformers
TODO
## Using an up-to-date version of SQLite ## Using an up-to-date version of SQLite
Some features of `sqlite-vec` will require an up-to-date SQLite library. You can see what version of SQLite your Python environment uses with [`sqlite3.sqlite-version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version), or with this one-line command: Some features of `sqlite-vec` will require an up-to-date SQLite library. You can
see what version of SQLite your Python environment uses with
[`sqlite3.sqlite_version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version),
or with this one-line command:
```bash ```bash
python -c 'import sqlite3; print(sqlite3.sqlite_version)' python -c 'import sqlite3; print(sqlite3.sqlite_version)'
``` ```
Currently, **SQLite version 3.41 or higher** is recommended but not required. `sqlite-vec` will work with older version, but certain features and queries will only work correctly in >=3.41. Currently, **SQLite version 3.41 or higher** is recommended but not required.
`sqlite-vec` will work with older versions, but certain features and queries will
only work correctly in >=3.41.
To "upgrade" the SQLite version your Python installation uses, you have a few options. To "upgrade" the SQLite version your Python installation uses, you have a few
options.
### Compile your own SQLite version ### Compile your own SQLite version
You can compile an up-to-date version of SQLite and use some system environment variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a different SQLite library. [This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python) goes into this approach in more details. You can compile an up-to-date version of SQLite and use some system environment
variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a
different SQLite library.
[This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python)
goes into this approach in more details.
Although compiling SQLite can be straightforward, there are a lot of different compilation options to consider, which makes it confusing. This also doesn't work with Windows, which statically compiles its own SQLite library. Although compiling SQLite can be straightforward, there are a lot of different
compilation options to consider, which makes it confusing. This also doesn't
work with Windows, which statically compiles its own SQLite library.
### Use `pysqlite3` ### Use `pysqlite3`
[`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package that bundles an up-to-date SQLite library as a separate pip package. [`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package
that bundles an up-to-date SQLite library as a separate pip package.
While it's mostly compatible with the Python `sqlite3` module, there are a few rare edge cases where the APIs don't match. While it's mostly compatible with the Python `sqlite3` module, there are a few
rare edge cases where the APIs don't match.
### Upgrading your Python version ### Upgrading your Python version
Sometimes installing a latest version of Python will "magically" upgrade your SQLite version as well. This is a nuclear option, as upgrading Python installations can be quite the hassle, but most Python 3.12 builds will have a very recent SQLite version. Sometimes installing a latest version of Python will "magically" upgrade your
SQLite version as well. This is a nuclear option, as upgrading Python
installations can be quite the hassle, but most Python 3.12 builds will have a
very recent SQLite version.

View file

@ -1,9 +1,37 @@
# Using `sqlite-vec` in Ruby # Using `sqlite-vec` in Ruby
https://rubygems.org/gems/sqlite-vec ![Gem](https://img.shields.io/gem/v/sqlite-vec?color=red&logo=rubygems&logoColor=white)
Ruby developers can use `sqlite-vec` with the [`sqlite-vec` Gem](https://rubygems.org/gems/sqlite-vec).
```bash ```bash
gem install sqlite-vec gem install sqlite-vec
``` ```
You can then use `SqliteVss.load()` to load `sqlite-vss` SQL functions in a given SQLite connection.
```ruby
require 'sqlite3'
require 'sqlite_vec'
db = SQLite3::Database.new(':memory:')
db.enable_load_extension(true)
SqliteVec.load(db)
db.enable_load_extension(false)
result = db.execute('SELECT vec_version()')
puts result.first.first
```
## Working with vectors in Ruby ## Working with vectors in Ruby
If your embeddings are provided as a list of numbers, use `.pack("f*")` to convert them into the compact BLOB format that `sqlite-vec` uses.
```ruby
embedding = [0.1, 0.2, 0.3, 0.4]
result = db.execute("SELECT vec_length(?)", [query.pack("f*")]])
puts result.first.first # 4
```

View file

@ -1,4 +1,5 @@
# Using `sqlite-vec` in Rust # Using `sqlite-vec` in Rust
[![Crates.io](https://img.shields.io/crates/v/sqlite-vec?logo=rust)](https://crates.io/crates/sqlite-vec)
You can embed `sqlite-vec` into your Rust projects using the official You can embed `sqlite-vec` into your Rust projects using the official
[`sqlite-vec` crate](https://crates.io/crates/sqlite-vec). [`sqlite-vec` crate](https://crates.io/crates/sqlite-vec).
@ -18,16 +19,29 @@ SQLite library's `sqlite3_auto_extension()` function. Here's an example with
```rs ```rs
use sqlite_vec::sqlite3_vec_init; use sqlite_vec::sqlite3_vec_init;
use rusqlite::{ffi::sqlite3_auto_extension}; use rusqlite::{ffi::sqlite3_auto_extension, Result};
fn main() { fn main()-> Result<()> {
unsafe { unsafe {
sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ()))); sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ())));
} }
// future database connection will now automatically include sqlite-vec functions! // future database connection will now automatically include sqlite-vec functions!
let db = Connection::open_in_memory()?;
let vec_version: String = db.query_row("select vec_version()", &[v.as_bytes()], |x| x.get(0)?)?;
println!("vec_version={vec_version}");
Ok(())
} }
``` ```
A full [`sqlite-vec` Rust demo](#TODO) is also available. A full [`sqlite-vec` Rust demo](#TODO) is also available.
## Working with vectors in Rust ## Working with vectors in Rust
If your vectors are provided as a `Vec<f32>` type, the [`zerocopy` crate](https://crates.io/crates/zerocopy) is recommended, specifically `zerocopy::AsBytes`. This will allow you to pass in vectors into `sqlite-vec` without any copying.
```rs
let query: Vec<f32> = vec![0.1, 0.2, 0.3, 0.4];
let mut stmt = db.prepare("SELECT vec_length(?)")?;
stmt.execute(&[item.1.as_bytes()])?;
```

View file

@ -1,5 +1,7 @@
# Using `sqlite-vec` in `sqlite-utils` # Using `sqlite-vec` in `sqlite-utils`
![sqlite-utils](https://img.shields.io/pypi/v/sqlite-utils-sqlite-vec.svg?color=B6B6D9&label=sqlite-utils+plugin&logoColor=white&logo=python)
```bash ```bash
sqlite-utils install sqlite-utils-sqlite-vec sqlite-utils install sqlite-utils-sqlite-vec
``` ```

View file

@ -0,0 +1,17 @@
# `sqlite-vec` in the Browser with WebAssembly
```html
<html>
<body>
<script type="module">
import {default as init} from "https://cdn.jsdelivr.net/npm/sqlite-vec-wasm-demo@latest/sqlite3.mjs";
const sqlite3 = await init();
const db = new sqlite3.oo1.DB(":memory:");
const [sqlite_version, vec_version] = db.selectArray('select vec_version();')
console.log(`vec_version=${vec_version}`);
</script>
</body>
</html>
```

View file

@ -1,10 +1,12 @@
# Semantic Versioning for `sqlite-vec` # Semantic Versioning for `sqlite-vec`
`sqlite-vec` is pre-v1, so according to the rules of [Semantic Versioning](https://semver.org/), `sqlite-vec` is pre-v1, so according to the rules of
so "minor" release like "0.2.0" or "0.3.0" may contain breaking changes. [Semantic Versioning](https://semver.org/), so "minor" release like "0.2.0" or
"0.3.0" may contain breaking changes.
But what exactly counts as a "breaking change" in a SQLite extension? The line isn't so clear, unforetunately. But what exactly counts as a "breaking change" in a SQLite extension? The line
Here are a all the surfaces that COULD count as a "breaking change": isn't so clear, unforetunately. Here are a all the surfaces that COULD count as
a "breaking change":
- SQL functions and columns on virtual tables - SQL functions and columns on virtual tables
- The C API (extension entrypoints) - The C API (extension entrypoints)
@ -13,7 +15,6 @@ Here are a all the surfaces that COULD count as a "breaking change":
## What counts as a "breaking change"? ## What counts as a "breaking change"?
### Changes to SQL functions ### Changes to SQL functions
- Re-naming or removing an SQL function - Re-naming or removing an SQL function
@ -25,14 +26,13 @@ Here are a all the surfaces that COULD count as a "breaking change":
### Changes to the C API ### Changes to the C API
Currently there is no "official" C API for `sqlite-vec`. However, there are entrypoints defined in C that C developers or developers using FFI can call. Any Currently there is no "official" C API for `sqlite-vec`. However, there are
entrypoints defined in C that C developers or developers using FFI can call. Any
### Compile-time options ### Compile-time options
The removal of any compile time options The removal of any compile time options
## When is `v1.0` coming? ## When is `v1.0` coming?
In a few months! The main problems I want to solve before `v1.0` include: In a few months! The main problems I want to solve before `v1.0` include:
@ -42,8 +42,13 @@ In a few months! The main problems I want to solve before `v1.0` include:
- ANN indexing - ANN indexing
- Quantization + pre-transformations - Quantization + pre-transformations
Once those items are complete, I will likely create a `v1.0` release, along with renaming the `vec0` virtual table modile to `vec1`. And if future major releases are required, a `v2.0` major releases will be made with new `vec2` virtual tables and so on. Once those items are complete, I will likely create a `v1.0` release, along with
renaming the `vec0` virtual table modile to `vec1`. And if future major releases
are required, a `v2.0` major releases will be made with new `vec2` virtual
tables and so on.
Ideally, only a `v1` major release would be required. But who knows what the future has in store with vector search! Ideally, only a `v1` major release would be required. But who knows what the
future has in store with vector search!
In general, I will try my best to maximize stability and limit the number of breaking changes for future `sqlite-vec` versions. In general, I will try my best to maximize stability and limit the number of
breaking changes for future `sqlite-vec` versions.