diff --git a/README.md b/README.md
index cc756ab..f8bc302 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,25 @@ See the Sponsors section for more details.
diff --git a/site/.vitepress/theme/index.ts b/site/.vitepress/theme/index.ts
index 54e08a8..82048fd 100644
--- a/site/.vitepress/theme/index.ts
+++ b/site/.vitepress/theme/index.ts
@@ -4,6 +4,7 @@ import type { Theme } from "vitepress";
import DefaultTheme from "vitepress/theme";
import "./style.css";
import Sponsors from "./Sponsors.vue";
+import HeroImg from "./HeroImg.vue";
export default {
extends: DefaultTheme,
@@ -14,7 +15,7 @@ export default {
h("marquee", { class: "banner", scrollamount: "10" }, [
"🚧🚧🚧 sqlite-vec is still in beta, and this documentation is incomplete! Watch the repo for updates 🚧🚧🚧",
]),
- "home-hero-image": () => h("div", {}, [""]),
+ //"home-hero-image": () => h(HeroImg),
"aside-ads-before": () => h(Sponsors),
});
},
diff --git a/site/.vitepress/theme/style.css b/site/.vitepress/theme/style.css
index 33a6310..df4b100 100644
--- a/site/.vitepress/theme/style.css
+++ b/site/.vitepress/theme/style.css
@@ -1,3 +1,21 @@
+/*@import "https://code.cdn.mozilla.net/fonts/zilla-slab.css";*/
+
+@font-face {
+ font-family: "ZillaSlab-SemiBold";
+ src: url("/fonts/ZillaSlab-SemiBold.woff");
+ src: url("/fonts/ZillaSlab-SemiBold.woff2") format("woff2"),
+ url("/fonts/ZillaSlab-SemiBold.woff") format("woff"),
+ url("/fonts/ZillaSlab(-SemiBold).otf") format("opentype"),
+ url("/fonts/ZillaSlab-SemiBold.ttf") format("truetype");
+ font-weight: 600;
+ font-style: normal;
+}
+
+.VPHero h1,
+.VPNavBarTitle .title {
+ font-family: "ZillaSlab-SemiBold";
+ font-size: 1.5rem;
+}
/**
* Customize default theme styling by overriding CSS variables:
* https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css
@@ -68,6 +86,17 @@
--vp-c-danger-2: var(--vp-c-red-2);
--vp-c-danger-3: var(--vp-c-red-3);
--vp-c-danger-soft: var(--vp-c-red-soft);
+
+ --vp-c-brand-1x: #a6d189;
+ --vp-c-brand-1x: #a6da95;
+ --vp-c-brand-1x: #a6e3a1;
+}
+
+:root {
+ --vp-c-brand-1: #1e66f5;
+}
+.dark {
+ --vp-c-brand-1: #89b4fa;
}
/**
@@ -92,19 +121,20 @@
:root {
--vp-home-hero-name-color: transparent;
- --vp-home-hero-name-background: -webkit-linear-gradient(
- 120deg,
- #f5c2e7 30%,
- #94e2d5 /*#bd34fe 30%,
- #41d1ff*/
- );
+ --vp-home-hero-name-background: black;
+ /*
--vp-home-hero-image-background-image: linear-gradient(
-45deg,
#bd34fe 50%,
#47caff 50%
);
--vp-home-hero-image-filter: blur(44px);
+ */
+}
+
+.dark {
+ --vp-home-hero-name-background: white;
}
@media (min-width: 640px) {
diff --git a/site/getting-started/installation.md b/site/getting-started/installation.md
new file mode 100644
index 0000000..01a09dd
--- /dev/null
+++ b/site/getting-started/installation.md
@@ -0,0 +1,49 @@
+# Installing
+
+You have several options to include `sqlite-vec` into your projects, including
+PyPi packages for Python, NPM packages for Node.js, Gems for Ruby, and more.
+
+## With popular package managers
+
+::: code-group
+
+```bash [Python]
+pip install sqlite-vec
+```
+
+```bash [Node.js]
+npm install sqlite-vec
+```
+
+```bash [Bun]
+bun install sqlite-vec
+```
+
+```bash [Deno]
+deno add npm:sqlite-vec
+```
+
+```bash [Ruby]
+gem install sqlite-vec
+```
+
+```bash [Rust]
+cargo add sqlite-vec
+```
+
+```bash [Go]
+go get -u github.com/asg017/sqlite-vec/bindings/go/cgo
+```
+
+```bash [Datasette]
+datasette install datasette-sqlite-vec
+```
+
+```bash [sqlite-utils]
+sqlite-utils install sqlite-utils-sqlite-vec
+```
+
+:::
+
+Alternatively, you can download pre-compiled loadable extensions from the
+[`sqlite-vec` Github Releases](https://github.com/asg017/sqlite-vec/releases/latest).
diff --git a/site/getting-started/introduction.md b/site/getting-started/introduction.md
new file mode 100644
index 0000000..c827b37
--- /dev/null
+++ b/site/getting-started/introduction.md
@@ -0,0 +1,7 @@
+# Introduction to `sqlite-vec`
+
+## Intro to Vector Databases
+
+## Vector Search in SQLite with `sqlite-vec`
+
+## Getting help
diff --git a/site/getting-started.md b/site/getting-started/quickstart.md
similarity index 100%
rename from site/getting-started.md
rename to site/getting-started/quickstart.md
diff --git a/site/guides/arithmetic.md b/site/guides/arithmetic.md
new file mode 100644
index 0000000..807ac86
--- /dev/null
+++ b/site/guides/arithmetic.md
@@ -0,0 +1,5 @@
+# Vector Arithmetic
+
+- `vec_add()`
+- `vec_sub()`
+- `vec_mean()`
diff --git a/site/guides/binary-quant.md b/site/guides/binary-quant.md
new file mode 100644
index 0000000..cab8924
--- /dev/null
+++ b/site/guides/binary-quant.md
@@ -0,0 +1,120 @@
+# Binary Quantization
+
+"Quantization" refers to a variety of methods and techniques for reducing the
+size of vectors in a vector index. **Binary quantization** (BQ) refers to a
+specific technique where each individual floating point element in a vector is
+reduced to a single bit, typically by assigning `0` to negative numbers and `1`
+to positive numbers.
+
+For example, in this 8-dimensional `float32` vector:
+
+```json
+[-0.73, -0.80, 0.12, -0.73, 0.79, -0.11, 0.23, 0.97]
+```
+
+Applying binary quantization would result in the following `bit` vector:
+
+```json
+[0, 0, 1, 0, 1, 0, 1, 1]
+```
+
+The original 8-dimensional `float32` vector requires `8 * 4 = 32` bytes of space
+to store. For 1 million vectors, that would be `32MB`. On the other hand, the
+binary quantized 8-dimensional vector can be stored in a single byte — one bit
+per element. For 1 million vectors, that would be just `1MB`, a 32x reduction!
+
+Though keep in mind, you're bound to lose a lot quality when reducing 32 bits of
+information to 1 bit. [Over-sampling and re-scoring](#re-scoring) will help a
+lot.
+
+The main goal of BQ is to dramatically reduce the size of your vector index,
+resulting in faster searches and less resources. This is especially useful in
+`sqlite-vec`, which is (currently) brute-force only and meant to run on small
+devices. BQ is an easy low-cost method to make larger vector datasets easy to
+manage.
+
+## Binary Quantization `sqlite-vec`
+
+The `sqlite-vec` extension offers a `vec_quantize_binary()` SQL scalar function,
+which applies binary quanitization to a `float32` or `int8` vector. For every
+element in a given vector, it will apply `0` to negative values and `1` to
+positive values, and pack them into a `BLOB`.
+
+```sqlite
+select vec_quantize_binary('[-0.73, -0.80, 0.12, -0.73, 0.79, -0.11, 0.23, 0.97]');
+-- X'd4`
+```
+
+The single byte `0xd4` in hexadecimal is `11010100` in binary.
+
+
+
+## Demo
+
+```sqlite
+create virtual table vec_movies using vec0(
+ synopsis_embedding bit[768]
+);
+```
+
+```sqlite
+insert into vec_movies(rowid, synopsis_embedding)
+ VALUES (:id, vec_quantize_binary(:vector));
+```
+
+```sqlite
+select
+ rowid,
+ distance
+from vec_movies
+where synopsis_embedding match vec_quantize_binary(:query)
+order by distance
+limit 20;
+```
+
+### Re-scoring
+
+```sqlite
+create virtual table vec_movies using vec0(
+ synopsis_embedding float[768],
+ synopsis_embedding_coarse bit[768]
+);
+```
+
+```sqlite
+insert into vec_movies(rowid, synopsis_embedding, synopsis_embedding_coarse)
+ VALUES (:id, :vector, vec_quantize_binary(:vector));
+```
+
+```sqlite
+with coarse_matches as (
+ select
+ rowid,
+ synopsis_embedding
+ from vec_movies
+ where synopsis_embedding_coarse match vec_quantize_binary(:query)
+ order by distance
+ limit 20 * 8
+),
+select
+ rowid,
+ vec_distance_L2(synopsis_embedding, :query)
+from coarse_matches
+order by 2
+limit 20;
+```
+
+# Benchmarks
+
+## Model support
+
+Certain embedding models, like [Nomic](https://nomic.ai/)'s
+[`nomic-embed-text-v1.5`](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5)
+text embedding model and
+[mixedbread.ai](https://www.mixedbread.ai/blog/mxbai-embed-2d-large-v1)'s
+[`mxbai-embed-large-v1`](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)
+are specifically trained to perform well after binary quantization.
+
+Other embeddings models may not, but you can still try BQ and see if it works
+for your datasets. Chances are, if your vectors are normalized (ie between
+`-1.0` and `1.0`) there's a good chance you will see acceptable results with BQ.
diff --git a/site/using/loadable.md b/site/guides/classifiers.md
similarity index 100%
rename from site/using/loadable.md
rename to site/guides/classifiers.md
diff --git a/site/guides/hybrid-search.md b/site/guides/hybrid-search.md
new file mode 100644
index 0000000..e69de29
diff --git a/site/guides/matryoshka.md b/site/guides/matryoshka.md
new file mode 100644
index 0000000..db3cd5c
--- /dev/null
+++ b/site/guides/matryoshka.md
@@ -0,0 +1,49 @@
+# Matryoshka (Adaptive-Length) Embeddings
+
+Matryoshka embeddings are a new class of embedding models introduced in the
+TODO-YYY paper [_TODO title_](https://arxiv.org/abs/2205.13147). They allow one
+to truncate excess dimensions in large vector, without lossing much quality.
+
+Let's say your embedding model generate 1024-dimensional vectors. If you have 1
+million of these 1024-dimensional vectors, they would take up `4.096 GB` of
+space! You're not able to reduce the dimensions without lossing a lot of
+quality - if you were to remove half of the dimensions 512-dimensional vectors,
+you could expect to also lose 50% or more of the quality of results. There are
+other dimensional-reduction techniques, like [PCA](#TODO), but this requires a
+complicated and expensive training process.
+
+Matryoshka embeddings, on the other hand, _can_ be truncated, without losing
+quality. Using [`mixedbread.ai`](#TODO) `mxbai-embed-large-v1` model, they claim
+that
+
+They are called "Matryoshka" embeddings because ... TODO
+
+## Matryoshka Embeddings with `sqlite-vec`
+
+You can use a combination of [`vec_slice()`](/api-reference#vec_slice) and
+[`vec_normalize()`](/api-reference#vec_slice) on Matryoshka embeddings to
+truncate.
+
+```sql
+select
+ vec_normalize(vec_slice(title_embeddings, 0, 256)) as title_embeddings_256d
+from vec_articles;
+```
+
+## Benchmarks
+
+## Suppported Models
+
+https://supabase.com/blog/matryoshka-embeddings#which-granularities-were-openais-text-embedding-3-models-trained-on
+
+`text-embedding-3-small`: 1536, 512 `text-embedding-3-large`: 3072, 1024, 256
+
+https://x.com/ZainHasan6/status/1757519325202686255
+
+`text-embeddings-3-large:` 3072, 1536, 1024, 512
+
+https://www.mixedbread.ai/blog/binary-mrl
+
+`mxbai-embed-large-v1`: 1024, 512, 256, 128, 64
+
+`nomic-embed-text-v1.5`: 768, 512, 256, 128, 64
diff --git a/site/guides/performance.md b/site/guides/performance.md
new file mode 100644
index 0000000..2972957
--- /dev/null
+++ b/site/guides/performance.md
@@ -0,0 +1,4 @@
+- page_size
+- memory mapping
+- in-memory index
+- chunk_size (?)
diff --git a/site/guides/rag.md b/site/guides/rag.md
new file mode 100644
index 0000000..f860172
--- /dev/null
+++ b/site/guides/rag.md
@@ -0,0 +1,4 @@
+# Retrival Augmented Generation (RAG)
+
+- "memories"?
+- chunking
diff --git a/site/guides/scalar-quant.md b/site/guides/scalar-quant.md
new file mode 100644
index 0000000..8738a0c
--- /dev/null
+++ b/site/guides/scalar-quant.md
@@ -0,0 +1,27 @@
+# Scalar Quantization (SQ)
+
+"Quantization" refers to a variety of methods and techniques for reducing the
+size of vectors in a vector index. **Scalar quantization** (SQ) refers to a
+specific technique where each individual floating point element in a vector is
+scaled to a small element type, like `float16`, `int8`.
+
+Most embedding models generate `float32` vectors. Each `float32` takes up 4
+bytes of space. This can add up, especially when working with a large amount of
+vectors or vectors with many dimensions. However, if you scale them to `float16`
+or `int8` vectors, they only take up 2 bytes of space and 1 bytes of space
+respectively, saving you precious space at the expense of some quality.
+
+```sql
+select vec_quantize_float16(vec_f32('[]'), 'unit');
+select vec_quantize_int8(vec_f32('[]'), 'unit');
+
+select vec_quantize('float16', vec_f32('...'));
+select vec_quantize('int8', vec_f32('...'));
+select vec_quantize('bit', vec_f32('...'));
+
+select vec_quantize('sqf16', vec_f32('...'));
+select vec_quantize('sqi8', vec_f32('...'));
+select vec_quantize('bq2', vec_f32('...'));
+```
+
+## Benchmarks
diff --git a/site/guides/semantic-search.md b/site/guides/semantic-search.md
new file mode 100644
index 0000000..e69de29
diff --git a/site/index.md b/site/index.md
index cf64396..c6c31ff 100644
--- a/site/index.md
+++ b/site/index.md
@@ -9,14 +9,14 @@ hero:
actions:
- theme: brand
text: Getting Started
- link: /getting-started
+ link: /introduction
- theme: alt
text: API Reference
link: /api-reference
features:
- title: Runs everywhere
- details: On the server, in the browser with WASM, mobile devices, and more!
+ details: On laptops, servers, mobile devices, browsers with WASM, Raspberry Pis, and more!
- title: Bindings for many languages
details: Python, Ruby, Node.js/Deno/Bun, Go, Rust, and more!
- title: Only SQL
@@ -24,49 +24,24 @@ features:
---
```sqlite
+-- store 768-dimensional vectors in a vec0 virtual table
create virtual table vec_movies using vec0(
synopsis_embedding float[768]
);
+-- insert vectors into the table, as JSON or compact BLOBs
insert into vec_movies(rowid, synopsis_embedding)
select
rowid,
embed(synopsis) as synopsis_embedding
from movies;
-select rowid, distance
+-- KNN search!
+select
+ rowid,
+ distance
from vec_movies
where synopsis_embedding match embed('scary futuristic movies')
order by distance
limit 20;
```
-
----
-
-
sqlite
create virtual table vec_movies using vec0(
- synopsis_embedding float[768]
-);
-
-insert into vec_movies(rowid, synopsis_embedding)
- select
- rowid,
- embed(synopsis) as synopsis_embedding
- from movies;
-
-select rowid, distance
-from vec_movies
-where synopsis_embedding match embed('scary futuristic movies')
-order by distance
-limit 20;
-
-
-
-
diff --git a/site/project.data.ts b/site/project.data.ts
index 7403c13..586bbed 100644
--- a/site/project.data.ts
+++ b/site/project.data.ts
@@ -1,9 +1,9 @@
-import { defineConfig } from "vitepress";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const PROJECT = "sqlite-vec";
+
const VERSION = readFileSync(
join(dirname(fileURLToPath(import.meta.url)), "..", "VERSION"),
"utf8"
diff --git a/site/public/fonts/ZillaSlab-SemiBold.otf b/site/public/fonts/ZillaSlab-SemiBold.otf
new file mode 100644
index 0000000..f6d1a2d
Binary files /dev/null and b/site/public/fonts/ZillaSlab-SemiBold.otf differ
diff --git a/site/public/fonts/ZillaSlab-SemiBold.ttf b/site/public/fonts/ZillaSlab-SemiBold.ttf
new file mode 100644
index 0000000..60d75e0
Binary files /dev/null and b/site/public/fonts/ZillaSlab-SemiBold.ttf differ
diff --git a/site/public/fonts/ZillaSlab-SemiBold.woff b/site/public/fonts/ZillaSlab-SemiBold.woff
new file mode 100644
index 0000000..8f925fb
Binary files /dev/null and b/site/public/fonts/ZillaSlab-SemiBold.woff differ
diff --git a/site/public/fonts/ZillaSlab-SemiBold.woff2 b/site/public/fonts/ZillaSlab-SemiBold.woff2
new file mode 100644
index 0000000..9665098
Binary files /dev/null and b/site/public/fonts/ZillaSlab-SemiBold.woff2 differ
diff --git a/site/public/logo.dark.svg b/site/public/logo.dark.svg
new file mode 100644
index 0000000..1dcd347
--- /dev/null
+++ b/site/public/logo.dark.svg
@@ -0,0 +1,17 @@
+
diff --git a/site/public/logo.light.svg b/site/public/logo.light.svg
new file mode 100644
index 0000000..6c138ee
--- /dev/null
+++ b/site/public/logo.light.svg
@@ -0,0 +1,17 @@
+
diff --git a/site/using/js.md b/site/using/js.md
index dcf91f0..a5fd6d5 100644
--- a/site/using/js.md
+++ b/site/using/js.md
@@ -1,13 +1,65 @@
# Using `sqlite-vec` in Node.js, Deno, and Bun
-```bash
+To use `sqlite-vec` in Node.js, Deno or Bun, install the
+[`sqlite-vec` NPM package](https://npmjs.com/package/sqlite-vec) using your
+favorite package manager:
+
+::: code-group
+
+```bash [npm]
npm install sqlite-vec
```
+```bash [Bun]
+bun install sqlite-vec
+```
+
+```bash [Deno]
+deno add npm:sqlite-vec
+```
+
+:::
+
+Once installed, use the `sqliteVec.load()` function to load `sqlite-vec` SQL
+functions into a SQLite connection.
+
+```js
+import * as sqliteVec from "sqlite-vec";
+import Database from "better-sqlite3";
+
+const db = new Database(":memory:");
+sqliteVec.load(db);
+
+const { vec_version } = db
+ .prepare("select vec_version() as vec_version;")
+ .get();
+
+console.log(`vec_version=${vec_version}`);
+```
+
+The `load()` function is compatable with
+[`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3),
+[`node-sqlite3`](https://github.com/TryGhost/node-sqlite3),
+[`js:@db/sqlite`](https://jsr.io/@db/sqlite) (Deno), and
+[`bun:sqlite`](https://bun.sh/docs/api/sqlite).
+
## Working with vectors in JavaScript
+if your vectors are represented as an array of numbers, use
+[Float32Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Float32Array),
+use the
+[`.buffer`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray/buffer)
+accessor to insert the underlying ArrayBuffer.
+
+```js
+const embedding = new Float32Array([0.1, 0.2, 0.3]);
+const stmt = db.prepare("INSERT INTO vss_demo VALUES (?)");
+stmt.run(embedding.buffer);
+
+
## Node.js
## Deno
## Bun
+```
diff --git a/site/using/python.md b/site/using/python.md
index 76494a4..b5ad220 100644
--- a/site/using/python.md
+++ b/site/using/python.md
@@ -4,12 +4,21 @@ title: sqlite-vec in Python
# Using `sqlite-vec` in Python
+[](https://pypi.org/project/sqlite-vec/)
+
+To use `sqlite-vec` from Python, install the
+[`sqlite-vec` PyPi package](https://pypi.org/project/sqlite-vec/) using your
+favorite Python package manager:
+
```bash
pip install sqlite-vec
```
+Once installed, use the `sqlite_vec.load()` function to load `sqlite-vec` SQL
+functions into a SQLite connection.
+
```python
-import sqlite
+import sqlite3
import sqlite_vec
db = sqlite3.connect(":memory:")
@@ -19,13 +28,122 @@ db.enable_load_extension(False)
vec_version, = db.execute("select vec_version()").fetchone()
print(f"vec_version={vec_version}")
-
```
## Working with Vectors
-### Vectors as Lists
+### Lists
-### `numpy` Arrays
+If the vectors you are working with are provided as a list of floats, you can convert them into the compact BLOB format that `sqlite-vec` uses with [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack).
+
+```python
+import struct
+
+def serialize(vector: List[float]) -> bytes:
+ """ serializes a list of floats into a compact "raw bytes" format """
+ return struct.pack('%sf' % len(vector), *vector)
+
+
+embedding = [0.1, 0.2, 0.3, 0.4]
+result = db.execute('select vec_length(?)', [serialize(embedding)]).fetchone()[0]
+
+print(result) # 4
+```
+
+### NumPy Arrays
+
+If your vectors are from `numpy` arrays, the Python SQLite package allows you to pass it along as-is. Make sure you convert your array elements to 32-bit floats with [`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html), as some embedding services will use `np.float64` elements.
+
+
+```python
+import numpy as np
+
+embedding = np.array([0.1, 0.2, 0.3, 0.4])
+result = db.execute('select vec_length(?)', [embedding.astype(np.float32)]).fetchone()[0]
+print(result) # 4
+```
+
+## Recipes
+
+### OpenAI
+
+https://platform.openai.com/docs/guides/embeddings/what-are-embeddings?lang=python
+
+TODO
+
+```python
+from openai import OpenAI
+import sqlite3
+import sqlite_vec
+
+texts = [
+
+ 'Capri-Sun is a brand of juice concentrate–based drinks manufactured by the German company Wild and regional licensees.',
+ 'Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.',
+ 'George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.',
+ 'Alan Mathison Turing was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.',
+ 'Alaqua Cox is a Native American (Menominee) actress.'
+]
+
+# change ':memory:' to a filepath to persist data
+db = sqlite3.connect(':memory:')
+db.enable_load_extension(True)
+sqlite_vec.load(db)
+db.enable_load_extension(False)
+
+client = OpenAI()
+
+response = client.embeddings.create(
+ input=[texts],
+ model="text-embedding-3-small"
+)
+
+print(response.data[0].embedding)
+```
+
+### llamafile
+
+https://github.com/Mozilla-Ocho/llamafile
+
+TODO
+
+### llama-cpp-python
+
+https://github.com/abetlen/llama-cpp-python
+
+TODO
+
+### sentence-transformers (etc.)
+
+https://github.com/UKPLab/sentence-transformers
+
+TODO
## Using an up-to-date version of SQLite
+
+Some features of `sqlite-vec` will require an up-to-date SQLite library. You can see what version of SQLite your Python environment uses with [`sqlite3.sqlite-version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version), or with this one-line command:
+
+
+```bash
+python -c 'import sqlite3; print(sqlite3.sqlite_version)'
+```
+
+Currently, **SQLite version 3.41 or higher** is recommended but not required. `sqlite-vec` will work with older version, but certain features and queries will only work correctly in >=3.41.
+
+To "upgrade" the SQLite version your Python installation uses, you have a few options.
+
+### Compile your own SQLite version
+
+You can compile an up-to-date version of SQLite and use some system environment variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a different SQLite library. [This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python) goes into this approach in more details.
+
+Although compiling SQLite can be straightforward, there are a lot of different compilation options to consider, which makes it confusing. This also doesn't work with Windows, which statically compiles its own SQLite library.
+
+### Use `pysqlite3`
+
+[`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package that bundles an up-to-date SQLite library as a separate pip package.
+
+While it's mostly compatible with the Python `sqlite3` module, there are a few rare edge cases where the APIs don't match.
+
+### Upgrading your Python version
+
+Sometimes installing a latest version of Python will "magically" upgrade your SQLite version as well. This is a nuclear option, as upgrading Python installations can be quite the hassle, but most Python 3.12 builds will have a very recent SQLite version.
diff --git a/site/using/ruby.md b/site/using/ruby.md
index 1fd06a8..9b1eb5d 100644
--- a/site/using/ruby.md
+++ b/site/using/ruby.md
@@ -1,5 +1,7 @@
# Using `sqlite-vec` in Ruby
+https://rubygems.org/gems/sqlite-vec
+
```bash
gem install sqlite-vec
```
diff --git a/tests/test-loadable.py b/tests/test-loadable.py
index 52f4c3d..2453b2c 100644
--- a/tests/test-loadable.py
+++ b/tests/test-loadable.py
@@ -479,13 +479,12 @@ def test_vec_quantize_i8():
).fetchone()[0]
assert vec_quantize_i8() == 111
-
@pytest.mark.skip(reason="TODO")
def test_vec_quantize_binary():
- vec_quantize_binary = lambda *args: db.execute(
- "select vec_quantize_binary()", args
+ vec_quantize_binary = lambda *args, input="?": db.execute(
+ f"select vec_quantize_binary({input})", args
).fetchone()[0]
- assert vec_quantize_binary() == 111
+ assert vec_quantize_binary("[-1, -1, -1, -1, 1, 1, 1, 1]") == 111
@pytest.mark.skip(reason="TODO")