From 356f75cca78b6fad8215e4e50ff23fbb1a7f9fd7 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Wed, 31 Jul 2024 12:55:03 -0700 Subject: [PATCH] docs --- site/.vitepress/config.mts | 39 +++++---- site/api-reference.md | 8 +- site/build-ref.mjs | 33 +++---- site/compiling.md | 27 ++++++ site/features/knn.md | 15 ++++ site/getting-started/installation.md | 33 ++++++- site/guides/matryoshka.md | 24 +++-- site/project.data.ts | 4 +- site/using/datasette.md | 2 + site/using/go.md | 81 ++++++++++++++++- site/using/js.md | 31 ++++--- site/using/python.md | 125 ++++++++------------------- site/using/ruby.md | 30 ++++++- site/using/rust.md | 18 +++- site/using/sqlite-utils.md | 2 + site/using/wasm.md | 17 ++++ site/versioning.md | 27 +++--- 17 files changed, 350 insertions(+), 166 deletions(-) create mode 100644 site/features/knn.md diff --git a/site/.vitepress/config.mts b/site/.vitepress/config.mts index 055a59b..750271e 100644 --- a/site/.vitepress/config.mts +++ b/site/.vitepress/config.mts @@ -1,6 +1,6 @@ -import { defineConfig, DefaultTheme, HeadConfig } from "vitepress"; +import { DefaultTheme, defineConfig, HeadConfig } from "vitepress"; import { readFileSync } from "node:fs"; -import { join, dirname } from "node:path"; +import { dirname, join } from "node:path"; import { fileURLToPath } from "node:url"; const PROJECT = "sqlite-vec"; @@ -105,8 +105,12 @@ function nav(): DefaultTheme.NavItem[] { link: `https://crates.io/crates/${PROJECT}`, }, { - text: "Golang: Go module", - link: `https://pkg.go.dev/github.com/asg017/${PROJECT}/bindings/go/cgo`, + text: "Golang: Go module (CGO)", + link: `https://pkg.go.dev/github.com/asg017/${PROJECT}-go-bindings/cgo`, + }, + { + text: "Golang: Go module (WASM ncruces)", + link: `https://pkg.go.dev/github.com/asg017/${PROJECT}-go-bindings/ncruces`, }, { text: "Datasette: Plugin", @@ -139,16 +143,7 @@ function sidebar(): DefaultTheme.SidebarItem[] { }, ], }, - { - text: "Features", - collapsed: true, - items: [ - { text: "Vector formats", link: "/vector-formats" }, - { text: "KNN queries", link: "/knn" }, - { text: "vec0 virtual vables", link: "/vec0" }, - { text: "Static blobs", link: "/numpy" }, - ], - }, + { text: "Using with...", collapsed: true, @@ -159,11 +154,21 @@ function sidebar(): DefaultTheme.SidebarItem[] { { text: "Rust", link: "/rust" }, { text: "Go", link: "/go" }, { text: "C/C++", link: "/c" }, - { text: "WebAssembly (Browser)", link: "/wasm" }, + { text: "Browser (WASM)", link: "/wasm" }, { text: "Datasette", link: "/datasette" }, { text: "sqlite-utils", link: "/sqlite-utils" }, ], }, + { + text: "Features", + collapsed: true, + items: [ + { text: "Vector formats", link: "/features/vector-formats" }, + { text: "KNN queries", link: "/features/knn" }, + { text: "vec0 virtual vables", link: "/features/vec0" }, + { text: "Static blobs", link: "/features/static-blobs" }, + ], + }, guides, { text: "Documentation", @@ -172,10 +177,6 @@ function sidebar(): DefaultTheme.SidebarItem[] { { text: "API Reference", link: "/api-reference" }, ], }, - { - text: "Sponsors", - link: "/sponsors", - }, { text: "See also", items: [ diff --git a/site/api-reference.md b/site/api-reference.md index 9f7b5fe..bd144ea 100644 --- a/site/api-reference.md +++ b/site/api-reference.md @@ -639,7 +639,7 @@ Returns a version string of the current `sqlite-vec` installation. ```sql select vec_version(); --- 'v0.0.1-alpha.36' +-- 'v0.0.1-alpha.37' ``` @@ -651,9 +651,9 @@ Returns debugging information of the current `sqlite-vec` installation. ```sql select vec_debug(); /* -'Version: v0.0.1-alpha.36 -Date: 2024-07-16T23:06:41Z-0700 -Commit: e507bc0230de6dc44c7ff3b4895785edd734f31d +'Version: v0.0.1-alpha.37 +Date: 2024-07-23T14:09:43Z-0700 +Commit: 77f9b0374c8129056b344854de2dff6b103e5729 Build flags: avx ' */ diff --git a/site/build-ref.mjs b/site/build-ref.mjs index 32702d9..6d37512 100644 --- a/site/build-ref.mjs +++ b/site/build-ref.mjs @@ -1,7 +1,7 @@ import Database from "better-sqlite3"; import { load } from "js-yaml"; import { fileURLToPath } from "node:url"; -import { resolve, dirname } from "node:path"; +import { dirname, resolve } from "node:path"; import { readFileSync, writeFileSync } from "node:fs"; import * as v from "valibot"; import { table } from "table"; @@ -24,11 +24,11 @@ sqlite-vec is pre-v1, so expect breaking changes. const REF_PATH = resolve( dirname(fileURLToPath(import.meta.url)), - "../reference.yaml" + "../reference.yaml", ); const EXT_PATH = resolve( dirname(fileURLToPath(import.meta.url)), - "../dist/vec0" + "../dist/vec0", ); const DocSchema = v.objectWithRest( @@ -38,7 +38,7 @@ const DocSchema = v.objectWithRest( v.object({ title: v.string(), desc: v.string(), - }) + }), ), }, v.record( @@ -47,8 +47,8 @@ const DocSchema = v.objectWithRest( params: v.array(v.string()), desc: v.string(), example: v.union([v.string(), v.array(v.string())]), - }) - ) + }), + ), ); const tableConfig = { @@ -92,8 +92,9 @@ function formatSingleValue(value) { s += "'"; return `-- ${s}`; } - if (typeof value === "object" || Array.isArray(value)) + if (typeof value === "object" || Array.isArray(value)) { return "-- " + JSON.stringify(value, null, 2); + } } function formatValue(value) { if (typeof value === "string") return `'${value}'`; @@ -107,8 +108,9 @@ function formatValue(value) { s += "'"; return s; } - if (typeof value === "object" || Array.isArray(value)) + if (typeof value === "object" || Array.isArray(value)) { return JSON.stringify(value, null, 2); + } } function tableize(stmt, results) { const columnNames = stmt.columns().map((c) => c.name); @@ -159,10 +161,9 @@ function renderExamples(db, name, example) { continue; } - const result = - results.length > 1 || stmt.columns().length > 1 - ? `/*\n${tableize(stmt, results)}\n*/\n` - : formatSingleValue(results[0][0]); + const result = results.length > 1 || stmt.columns().length > 1 + ? `/*\n${tableize(stmt, results)}\n*/\n` + : formatSingleValue(results[0][0]); md += result + "\n\n"; } @@ -182,9 +183,11 @@ for (const section in doc.sections) { md += doc.sections[section].desc; md += "\n\n"; - for (const [name, { params, desc, example }] of Object.entries( - doc[section] - )) { + for ( + const [name, { params, desc, example }] of Object.entries( + doc[section], + ) + ) { const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`; md += "### " + headerText + "\n\n"; diff --git a/site/compiling.md b/site/compiling.md index e69de29..59d02a0 100644 --- a/site/compiling.md +++ b/site/compiling.md @@ -0,0 +1,27 @@ +# Compiling `sqlite-vec` + +## From Source + +```bash +git clone https://github.com/asg017/sqlite-vec +cd sqlite-vec +make loadable +``` + +`dist/vec0` + + +## From the amalgammation build + +``` +https://github.com/asg017/sqlite-vec/releases/download/latest/TODO +``` + +## Compile-time options + + +SQLITE_VEC_ENABLE_AVX + +SQLITE_VEC_ENABLE_NEON + +SQLITE_VEC_OMIT_FS diff --git a/site/features/knn.md b/site/features/knn.md new file mode 100644 index 0000000..dcdbb31 --- /dev/null +++ b/site/features/knn.md @@ -0,0 +1,15 @@ +# KNN queries + +## `vec0` virtual tables + +## Manually with `vec_distance_l2()` + + +```sql +create table items( + contents text, + contents_embedding float[768] (check vec_f32(contents_embedding)) +); +``` + +## Static Blobs diff --git a/site/getting-started/installation.md b/site/getting-started/installation.md index 01a09dd..8730c6f 100644 --- a/site/getting-started/installation.md +++ b/site/getting-started/installation.md @@ -31,8 +31,11 @@ gem install sqlite-vec cargo add sqlite-vec ``` -```bash [Go] -go get -u github.com/asg017/sqlite-vec/bindings/go/cgo +```bash [Go (CGO)] +go get -u github.com/asg017/sqlite-vec-go-bindings/cgo +``` +```bash [Go (ncruces WASM)] +go get -u github.com/asg017/sqlite-vec-go-bindings/ncruces ``` ```bash [Datasette] @@ -45,5 +48,31 @@ sqlite-utils install sqlite-utils-sqlite-vec ::: +## Pre-compiled extensions + Alternatively, you can download pre-compiled loadable extensions from the [`sqlite-vec` Github Releases](https://github.com/asg017/sqlite-vec/releases/latest). + +There's also an `install.sh` script that will automatically download the appropriate pre-compiled extension from Github Releases to your machine. + + +```sh +# yolo +curl -L https://github.com/asg017/sqlite-vec/releases/download/latest/install.sh | sh +``` + +```sh +# ok lets play it safe +curl -o install.sh -L https://github.com/asg017/sqlite-vec/releases/download/latest/install.sh +# inspect your scripts +cat install.sh +# TODO Test if execute permissions? +./install.sh +``` + + +## Compiling + +`sqlite-vec` is a single `sqlite-vec.c` and `sqlite-vec.h`, and can be easily compiled for different platforms, or statically linked into larger applications. + +See [*Compiling `sqlite-vec`*](#compiling) for more information. diff --git a/site/guides/matryoshka.md b/site/guides/matryoshka.md index db3cd5c..7e02add 100644 --- a/site/guides/matryoshka.md +++ b/site/guides/matryoshka.md @@ -2,17 +2,17 @@ Matryoshka embeddings are a new class of embedding models introduced in the TODO-YYY paper [_TODO title_](https://arxiv.org/abs/2205.13147). They allow one -to truncate excess dimensions in large vector, without lossing much quality. +to truncate excess dimensions in large vector, without sacrificing much quality. Let's say your embedding model generate 1024-dimensional vectors. If you have 1 million of these 1024-dimensional vectors, they would take up `4.096 GB` of -space! You're not able to reduce the dimensions without lossing a lot of +space! You're not able to reduce the dimensions without losing a lot of quality - if you were to remove half of the dimensions 512-dimensional vectors, you could expect to also lose 50% or more of the quality of results. There are -other dimensional-reduction techniques, like [PCA](#TODO), but this requires a -complicated and expensive training process. +other dimensional-reduction techniques, like [PCA](#TODO) or [Product Quantization](#TODO), but they typically require +complicated and expensive training processes. -Matryoshka embeddings, on the other hand, _can_ be truncated, without losing +Matryoshka embeddings, on the other hand, _can_ be truncated, without losing much quality. Using [`mixedbread.ai`](#TODO) `mxbai-embed-large-v1` model, they claim that @@ -20,16 +20,20 @@ They are called "Matryoshka" embeddings because ... TODO ## Matryoshka Embeddings with `sqlite-vec` -You can use a combination of [`vec_slice()`](/api-reference#vec_slice) and -[`vec_normalize()`](/api-reference#vec_slice) on Matryoshka embeddings to +You can use a combination of [`vec_slice()`](../api-reference.md#vec_slice) and +[`vec_normalize()`](../api-reference.md#vec_slice) on Matryoshka embeddings to truncate. ```sql select - vec_normalize(vec_slice(title_embeddings, 0, 256)) as title_embeddings_256d + vec_normalize( + vec_slice(title_embeddings, 0, 256) + ) as title_embeddings_256d from vec_articles; ``` +[`vec_slice()`](../api-reference.md#vec_slice) will cut down the vector to the first 256 dimensions. Then [`vec_normalize()`](../api-reference.md#vec_normalize) will normalize that truncated vector, which is typically a required step for Matryoshka embeddings. + ## Benchmarks ## Suppported Models @@ -47,3 +51,7 @@ https://www.mixedbread.ai/blog/binary-mrl `mxbai-embed-large-v1`: 1024, 512, 256, 128, 64 `nomic-embed-text-v1.5`: 768, 512, 256, 128, 64 + +``` +# TODO new snowflake model +``` diff --git a/site/project.data.ts b/site/project.data.ts index 586bbed..6a1be97 100644 --- a/site/project.data.ts +++ b/site/project.data.ts @@ -1,12 +1,12 @@ import { readFileSync } from "node:fs"; -import { join, dirname } from "node:path"; +import { dirname, join } from "node:path"; import { fileURLToPath } from "node:url"; const PROJECT = "sqlite-vec"; const VERSION = readFileSync( join(dirname(fileURLToPath(import.meta.url)), "..", "VERSION"), - "utf8" + "utf8", ); export default { diff --git a/site/using/datasette.md b/site/using/datasette.md index 097562a..5e9d9f5 100644 --- a/site/using/datasette.md +++ b/site/using/datasette.md @@ -1,5 +1,7 @@ # Using `sqlite-vec` in Datasette +[![Datasette](https://img.shields.io/pypi/v/datasette-sqlite-vec.svg?color=B6B6D9&label=Datasette+plugin&logoColor=white&logo=python)](https://datasette.io/plugins/datasette-sqlite-vec) + ```bash datasette install datasette-sqlite-vec ``` diff --git a/site/using/go.md b/site/using/go.md index a5f504f..3f93c35 100644 --- a/site/using/go.md +++ b/site/using/go.md @@ -1,5 +1,7 @@ # Using `sqlite-vec` in Go +[![Go Reference](https://pkg.go.dev/badge/github.com/asg017/sqlite-vec-go-bindings/cgo.svg)](https://pkg.go.dev/github.com/asg017/sqlite-vec-go-bindings/cgo) [![Go Reference](https://pkg.go.dev/badge/github.com/asg017/sqlite-vec-go-bindings/ncruces.svg)](https://pkg.go.dev/github.com/asg017/sqlite-vec-go-bindings/ncruces) + There are two ways you can embed `sqlite-vec` into Go applications: a CGO option for libraries like [`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3), or a @@ -8,14 +10,87 @@ WASM-based option with ## Option 1: CGO +If using [`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3) or another CGO-based SQLite library, then use the `github.com/asg017/sqlite-vec-go-bindings/cgo` module to embed `sqlite-vec` into your Go application. + ```bash -go get -u github.com/asg017/sqlite-vec/bindings/go/cgo +go get -u github.com/asg017/sqlite-vec-go-bindings/cgo +``` + +This will compile and statically link `sqlite-vec` into your project. The initial build will be slow, but later builds will be cached and much faster. + +Use `sqlite_vec.Auto()` to enable `sqlite-vec` functions in all future database connections. Also `sqlite_vec.Cancel()` is available to undo `Auto()`. + +```go +package main + +import ( + "database/sql" + "log" + + sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo" + _ "github.com/mattn/go-sqlite3" +) + +func main() { + sqlite_vec.Auto() + db, err := sql.Open("sqlite3", ":memory:") + if err != nil { + log.Fatal(err) + } + defer db.Close() + + var vecVersion string + err = db.QueryRow("select vec_version()").Scan(&vecVersion) + if err != nil { + log.Fatal(err) + } + log.Printf("sqlite_version=%s, vec_version=%s\n",vecVersion) +} ``` ## Option 2: WASM based with `ncruces/go-sqlite3` -``` -go +[`github.com/ncruces/go-sqlite3`](https://github.com/ncruces/go-sqlite3) is an alternative SQLite Go driver that avoids CGO by using a custom WASM build of SQLite. To use `sqlite-vec` from this library, use the specicial WASM binary provided in `github.com/asg017/sqlite-vec-go-bindings/ncruces`. + +```bash +go get -u github.com/asg017/sqlite-vec-go-bindings/ncruces ``` +```go +package main + +import ( + _ "embed" + "log" + + _ "github.com/asg017/sqlite-vec-go-bindings/ncruces" + "github.com/ncruces/go-sqlite3" +) + +func main() { + db, err := sqlite3.Open(":memory:") + if err != nil { + log.Fatal(err) + } + + stmt, _, err := db.Prepare(`SELECT sqlite_version(), vec_version()`) + if err != nil { + log.Fatal(err) + } + + stmt.Step() + log.Printf("vec_version=%s\n", stmt.ColumnText(0)) + stmt.Close() +} +``` + +The `github.com/asg017/sqlite-vec-go-bindings/ncruces` package embeds a custom WASM build of SQLite, so there's no need to use `github.com/ncruces/go-sqlite3/embed`. + ## Working with vectors in Go + + +If vectors are provided as a list of floats, use `SerializeFloat32(list)` to serialize them into the compact BLOB format that `sqlite-vec` expects. + +```go +TODO +``` diff --git a/site/using/js.md b/site/using/js.md index 7b979c2..14f8eb3 100644 --- a/site/using/js.md +++ b/site/using/js.md @@ -56,41 +56,48 @@ accessor to bind as a parameter to `sqlite-vec` SQL functions. ```js // TODO const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]); -const stmt = db.prepare("INSERT INTO vss_demo VALUES (?)"); -stmt.run(embedding.buffer); +const stmt = db.prepare("select vec_length(?)"); +console.log(stmt.run(embedding.buffer)); ``` ## Node.js -Here's a quick recipe of using `sqlite-vec` with [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3) in Node.js. +Here's a quick recipe of using `sqlite-vec` with +[`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3) in Node.js. ```js - ``` -See [`simple-node/demo.mjs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-node/demo.mjs) +See +[`simple-node/demo.mjs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-node/demo.mjs) for a more complete Node.js demo. ## Deno -Here's a quick recipe of using `sqlite-vec` with [`jsr:@db/sqlite`](https://jsr.io/@db/sqlite) in Deno. It will only work on Deno version `1.44` or greater, because of a bug in previous Deno version. +Here's a quick recipe of using `sqlite-vec` with +[`jsr:@db/sqlite`](https://jsr.io/@db/sqlite) in Deno. It will only work on Deno +version `1.44` or greater, because of a bug in previous Deno version. - Keep in mind, the `better-sqlite3` example above also works in Deno, you just need to prefix the `better-sqlite3` import with `npm:`, like `import * from "npm:better-sqlite3"`. +Keep in mind, the `better-sqlite3` example above also works in Deno, you just +need to prefix the `better-sqlite3` import with `npm:`, like +`import * from "npm:better-sqlite3"`. ```ts - ``` -See [`simple-deno/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-deno/demo.ts) +See +[`simple-deno/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-deno/demo.ts) for a more complete Deno demo. ## Bun -Here's a quick recipe of using `sqlite-vec` with [`bun:sqlite`](https://bun.sh/docs/api/sqlite) in Bun. The `better-sqlite3` example above also works with Bun. +Here's a quick recipe of using `sqlite-vec` with +[`bun:sqlite`](https://bun.sh/docs/api/sqlite) in Bun. The `better-sqlite3` +example above also works with Bun. ```ts - ``` -See [`simple-bun/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-bun/demo.ts) +See +[`simple-bun/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-bun/demo.ts) for a more complete Bun demo. diff --git a/site/using/python.md b/site/using/python.md index ab68418..aeac269 100644 --- a/site/using/python.md +++ b/site/using/python.md @@ -34,126 +34,77 @@ print(f"vec_version={vec_version}") ### Lists -If the vectors you are working with are provided as a list of floats, you can convert them into the compact BLOB format that `sqlite-vec` uses with [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack). +If your vectors in Python are provided as a list of floats, you can +convert them into the compact BLOB format that `sqlite-vec` uses with +`serialize_float32()`. This will internally call [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack). ```python -import struct - -def serialize(vector: List[float]) -> bytes: - """ serializes a list of floats into a compact "raw bytes" format """ - return struct.pack('%sf' % len(vector), *vector) - +from sqlite_vec import serialize_float32 embedding = [0.1, 0.2, 0.3, 0.4] -result = db.execute('select vec_length(?)', [serialize(embedding)]).fetchone()[0] +result = db.execute('select vec_length(?)', [serialize_float32(embedding)]) -print(result) # 4 +print(result.fetchone()[0]) # 4 ``` ### NumPy Arrays -If your vectors are from `numpy` arrays, the Python SQLite package allows you to pass it along as-is. Make sure you convert your array elements to 32-bit floats with [`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html), as some embedding services will use `np.float64` elements. - +If your vectors are NumPy arrays, the Python SQLite package allows you to +pass it along as-is, since NumPy arrays implement [the Buffer protocol](https://docs.python.org/3/c-api/buffer.html). Make sure you cast your array elements to 32-bit floats +with +[`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html), +as some embeddings will use `np.float64`. ```python import numpy as np -import sqlite3 -import sqlite_vec - -db = sqlite3.connect(":memory:") -db.enable_load_extension(True) -sqlite_vec.load(db) -db.enable_load_extension(False) - -db.execute("CREATE VIRTUAL TABLE vec_demo(sample_embedding float[4])") - embedding = np.array([0.1, 0.2, 0.3, 0.4]) db.execute( - "INSERT INTO vec_demo(sample_embedding) VALUES (?)", [embedding.astype(np.float32)] -) + "SELECT vec_length(?)", [embedding.astype(np.float32)] +) # 4 ``` -## Recipes - -### OpenAI - -https://platform.openai.com/docs/guides/embeddings/what-are-embeddings?lang=python - -TODO - -```python -from openai import OpenAI -import sqlite3 -import sqlite_vec - -texts = [ - - 'Capri-Sun is a brand of juice concentrate–based drinks manufactured by the German company Wild and regional licensees.', - 'Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.', - 'George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.', - 'Alan Mathison Turing was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.', - 'Alaqua Cox is a Native American (Menominee) actress.' -] - -# change ':memory:' to a filepath to persist data -db = sqlite3.connect(':memory:') -db.enable_load_extension(True) -sqlite_vec.load(db) -db.enable_load_extension(False) - -client = OpenAI() - -response = client.embeddings.create( - input=[texts], - model="text-embedding-3-small" -) - -print(response.data[0].embedding) -``` - -### llamafile - -https://github.com/Mozilla-Ocho/llamafile - -TODO - -### llama-cpp-python - -https://github.com/abetlen/llama-cpp-python - -TODO - -### sentence-transformers (etc.) - -https://github.com/UKPLab/sentence-transformers - -TODO ## Using an up-to-date version of SQLite -Some features of `sqlite-vec` will require an up-to-date SQLite library. You can see what version of SQLite your Python environment uses with [`sqlite3.sqlite-version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version), or with this one-line command: - +Some features of `sqlite-vec` will require an up-to-date SQLite library. You can +see what version of SQLite your Python environment uses with +[`sqlite3.sqlite_version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version), +or with this one-line command: ```bash python -c 'import sqlite3; print(sqlite3.sqlite_version)' ``` -Currently, **SQLite version 3.41 or higher** is recommended but not required. `sqlite-vec` will work with older version, but certain features and queries will only work correctly in >=3.41. +Currently, **SQLite version 3.41 or higher** is recommended but not required. +`sqlite-vec` will work with older versions, but certain features and queries will +only work correctly in >=3.41. -To "upgrade" the SQLite version your Python installation uses, you have a few options. +To "upgrade" the SQLite version your Python installation uses, you have a few +options. ### Compile your own SQLite version -You can compile an up-to-date version of SQLite and use some system environment variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a different SQLite library. [This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python) goes into this approach in more details. +You can compile an up-to-date version of SQLite and use some system environment +variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a +different SQLite library. +[This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python) +goes into this approach in more details. -Although compiling SQLite can be straightforward, there are a lot of different compilation options to consider, which makes it confusing. This also doesn't work with Windows, which statically compiles its own SQLite library. +Although compiling SQLite can be straightforward, there are a lot of different +compilation options to consider, which makes it confusing. This also doesn't +work with Windows, which statically compiles its own SQLite library. ### Use `pysqlite3` -[`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package that bundles an up-to-date SQLite library as a separate pip package. +[`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package +that bundles an up-to-date SQLite library as a separate pip package. -While it's mostly compatible with the Python `sqlite3` module, there are a few rare edge cases where the APIs don't match. +While it's mostly compatible with the Python `sqlite3` module, there are a few +rare edge cases where the APIs don't match. ### Upgrading your Python version -Sometimes installing a latest version of Python will "magically" upgrade your SQLite version as well. This is a nuclear option, as upgrading Python installations can be quite the hassle, but most Python 3.12 builds will have a very recent SQLite version. +Sometimes installing a latest version of Python will "magically" upgrade your +SQLite version as well. This is a nuclear option, as upgrading Python +installations can be quite the hassle, but most Python 3.12 builds will have a +very recent SQLite version. diff --git a/site/using/ruby.md b/site/using/ruby.md index 9b1eb5d..349406b 100644 --- a/site/using/ruby.md +++ b/site/using/ruby.md @@ -1,9 +1,37 @@ # Using `sqlite-vec` in Ruby -https://rubygems.org/gems/sqlite-vec +![Gem](https://img.shields.io/gem/v/sqlite-vec?color=red&logo=rubygems&logoColor=white) + +Ruby developers can use `sqlite-vec` with the [`sqlite-vec` Gem](https://rubygems.org/gems/sqlite-vec). + ```bash gem install sqlite-vec ``` +You can then use `SqliteVss.load()` to load `sqlite-vss` SQL functions in a given SQLite connection. + +```ruby +require 'sqlite3' +require 'sqlite_vec' + +db = SQLite3::Database.new(':memory:') +db.enable_load_extension(true) +SqliteVec.load(db) +db.enable_load_extension(false) + +result = db.execute('SELECT vec_version()') +puts result.first.first + +``` + + ## Working with vectors in Ruby + +If your embeddings are provided as a list of numbers, use `.pack("f*")` to convert them into the compact BLOB format that `sqlite-vec` uses. + +```ruby +embedding = [0.1, 0.2, 0.3, 0.4] +result = db.execute("SELECT vec_length(?)", [query.pack("f*")]]) +puts result.first.first # 4 +``` diff --git a/site/using/rust.md b/site/using/rust.md index 7b1e7ed..33c9de6 100644 --- a/site/using/rust.md +++ b/site/using/rust.md @@ -1,4 +1,5 @@ # Using `sqlite-vec` in Rust +[![Crates.io](https://img.shields.io/crates/v/sqlite-vec?logo=rust)](https://crates.io/crates/sqlite-vec) You can embed `sqlite-vec` into your Rust projects using the official [`sqlite-vec` crate](https://crates.io/crates/sqlite-vec). @@ -18,16 +19,29 @@ SQLite library's `sqlite3_auto_extension()` function. Here's an example with ```rs use sqlite_vec::sqlite3_vec_init; -use rusqlite::{ffi::sqlite3_auto_extension}; +use rusqlite::{ffi::sqlite3_auto_extension, Result}; -fn main() { +fn main()-> Result<()> { unsafe { sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ()))); } // future database connection will now automatically include sqlite-vec functions! + let db = Connection::open_in_memory()?; + let vec_version: String = db.query_row("select vec_version()", &[v.as_bytes()], |x| x.get(0)?)?; + + println!("vec_version={vec_version}"); + Ok(()) } ``` A full [`sqlite-vec` Rust demo](#TODO) is also available. ## Working with vectors in Rust + +If your vectors are provided as a `Vec` type, the [`zerocopy` crate](https://crates.io/crates/zerocopy) is recommended, specifically `zerocopy::AsBytes`. This will allow you to pass in vectors into `sqlite-vec` without any copying. + +```rs +let query: Vec = vec![0.1, 0.2, 0.3, 0.4]; +let mut stmt = db.prepare("SELECT vec_length(?)")?; +stmt.execute(&[item.1.as_bytes()])?; +``` diff --git a/site/using/sqlite-utils.md b/site/using/sqlite-utils.md index ed4a74e..10b82cf 100644 --- a/site/using/sqlite-utils.md +++ b/site/using/sqlite-utils.md @@ -1,5 +1,7 @@ # Using `sqlite-vec` in `sqlite-utils` +![sqlite-utils](https://img.shields.io/pypi/v/sqlite-utils-sqlite-vec.svg?color=B6B6D9&label=sqlite-utils+plugin&logoColor=white&logo=python) + ```bash sqlite-utils install sqlite-utils-sqlite-vec ``` diff --git a/site/using/wasm.md b/site/using/wasm.md index e69de29..71a7504 100644 --- a/site/using/wasm.md +++ b/site/using/wasm.md @@ -0,0 +1,17 @@ +# `sqlite-vec` in the Browser with WebAssembly + +```html + + + + + +``` diff --git a/site/versioning.md b/site/versioning.md index 44d612b..5a691d9 100644 --- a/site/versioning.md +++ b/site/versioning.md @@ -1,10 +1,12 @@ # Semantic Versioning for `sqlite-vec` -`sqlite-vec` is pre-v1, so according to the rules of [Semantic Versioning](https://semver.org/), -so "minor" release like "0.2.0" or "0.3.0" may contain breaking changes. +`sqlite-vec` is pre-v1, so according to the rules of +[Semantic Versioning](https://semver.org/), so "minor" release like "0.2.0" or +"0.3.0" may contain breaking changes. -But what exactly counts as a "breaking change" in a SQLite extension? The line isn't so clear, unforetunately. -Here are a all the surfaces that COULD count as a "breaking change": +But what exactly counts as a "breaking change" in a SQLite extension? The line +isn't so clear, unforetunately. Here are a all the surfaces that COULD count as +a "breaking change": - SQL functions and columns on virtual tables - The C API (extension entrypoints) @@ -13,7 +15,6 @@ Here are a all the surfaces that COULD count as a "breaking change": ## What counts as a "breaking change"? - ### Changes to SQL functions - Re-naming or removing an SQL function @@ -25,14 +26,13 @@ Here are a all the surfaces that COULD count as a "breaking change": ### Changes to the C API -Currently there is no "official" C API for `sqlite-vec`. However, there are entrypoints defined in C that C developers or developers using FFI can call. Any - +Currently there is no "official" C API for `sqlite-vec`. However, there are +entrypoints defined in C that C developers or developers using FFI can call. Any ### Compile-time options The removal of any compile time options - ## When is `v1.0` coming? In a few months! The main problems I want to solve before `v1.0` include: @@ -42,8 +42,13 @@ In a few months! The main problems I want to solve before `v1.0` include: - ANN indexing - Quantization + pre-transformations -Once those items are complete, I will likely create a `v1.0` release, along with renaming the `vec0` virtual table modile to `vec1`. And if future major releases are required, a `v2.0` major releases will be made with new `vec2` virtual tables and so on. +Once those items are complete, I will likely create a `v1.0` release, along with +renaming the `vec0` virtual table modile to `vec1`. And if future major releases +are required, a `v2.0` major releases will be made with new `vec2` virtual +tables and so on. -Ideally, only a `v1` major release would be required. But who knows what the future has in store with vector search! +Ideally, only a `v1` major release would be required. But who knows what the +future has in store with vector search! -In general, I will try my best to maximize stability and limit the number of breaking changes for future `sqlite-vec` versions. +In general, I will try my best to maximize stability and limit the number of +breaking changes for future `sqlite-vec` versions.