mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
docs
This commit is contained in:
parent
4febdff11a
commit
356f75cca7
17 changed files with 350 additions and 166 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import { defineConfig, DefaultTheme, HeadConfig } from "vitepress";
|
||||
import { DefaultTheme, defineConfig, HeadConfig } from "vitepress";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { dirname, join } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const PROJECT = "sqlite-vec";
|
||||
|
|
@ -105,8 +105,12 @@ function nav(): DefaultTheme.NavItem[] {
|
|||
link: `https://crates.io/crates/${PROJECT}`,
|
||||
},
|
||||
{
|
||||
text: "Golang: Go module",
|
||||
link: `https://pkg.go.dev/github.com/asg017/${PROJECT}/bindings/go/cgo`,
|
||||
text: "Golang: Go module (CGO)",
|
||||
link: `https://pkg.go.dev/github.com/asg017/${PROJECT}-go-bindings/cgo`,
|
||||
},
|
||||
{
|
||||
text: "Golang: Go module (WASM ncruces)",
|
||||
link: `https://pkg.go.dev/github.com/asg017/${PROJECT}-go-bindings/ncruces`,
|
||||
},
|
||||
{
|
||||
text: "Datasette: Plugin",
|
||||
|
|
@ -139,16 +143,7 @@ function sidebar(): DefaultTheme.SidebarItem[] {
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
text: "Features",
|
||||
collapsed: true,
|
||||
items: [
|
||||
{ text: "Vector formats", link: "/vector-formats" },
|
||||
{ text: "KNN queries", link: "/knn" },
|
||||
{ text: "vec0 virtual vables", link: "/vec0" },
|
||||
{ text: "Static blobs", link: "/numpy" },
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
text: "Using with...",
|
||||
collapsed: true,
|
||||
|
|
@ -159,11 +154,21 @@ function sidebar(): DefaultTheme.SidebarItem[] {
|
|||
{ text: "Rust", link: "/rust" },
|
||||
{ text: "Go", link: "/go" },
|
||||
{ text: "C/C++", link: "/c" },
|
||||
{ text: "WebAssembly (Browser)", link: "/wasm" },
|
||||
{ text: "Browser (WASM)", link: "/wasm" },
|
||||
{ text: "Datasette", link: "/datasette" },
|
||||
{ text: "sqlite-utils", link: "/sqlite-utils" },
|
||||
],
|
||||
},
|
||||
{
|
||||
text: "Features",
|
||||
collapsed: true,
|
||||
items: [
|
||||
{ text: "Vector formats", link: "/features/vector-formats" },
|
||||
{ text: "KNN queries", link: "/features/knn" },
|
||||
{ text: "vec0 virtual vables", link: "/features/vec0" },
|
||||
{ text: "Static blobs", link: "/features/static-blobs" },
|
||||
],
|
||||
},
|
||||
guides,
|
||||
{
|
||||
text: "Documentation",
|
||||
|
|
@ -172,10 +177,6 @@ function sidebar(): DefaultTheme.SidebarItem[] {
|
|||
{ text: "API Reference", link: "/api-reference" },
|
||||
],
|
||||
},
|
||||
{
|
||||
text: "Sponsors",
|
||||
link: "/sponsors",
|
||||
},
|
||||
{
|
||||
text: "See also",
|
||||
items: [
|
||||
|
|
|
|||
|
|
@ -639,7 +639,7 @@ Returns a version string of the current `sqlite-vec` installation.
|
|||
|
||||
```sql
|
||||
select vec_version();
|
||||
-- 'v0.0.1-alpha.36'
|
||||
-- 'v0.0.1-alpha.37'
|
||||
|
||||
|
||||
```
|
||||
|
|
@ -651,9 +651,9 @@ Returns debugging information of the current `sqlite-vec` installation.
|
|||
```sql
|
||||
select vec_debug();
|
||||
/*
|
||||
'Version: v0.0.1-alpha.36
|
||||
Date: 2024-07-16T23:06:41Z-0700
|
||||
Commit: e507bc0230de6dc44c7ff3b4895785edd734f31d
|
||||
'Version: v0.0.1-alpha.37
|
||||
Date: 2024-07-23T14:09:43Z-0700
|
||||
Commit: 77f9b0374c8129056b344854de2dff6b103e5729
|
||||
Build flags: avx '
|
||||
*/
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import Database from "better-sqlite3";
|
||||
import { load } from "js-yaml";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { resolve, dirname } from "node:path";
|
||||
import { dirname, resolve } from "node:path";
|
||||
import { readFileSync, writeFileSync } from "node:fs";
|
||||
import * as v from "valibot";
|
||||
import { table } from "table";
|
||||
|
|
@ -24,11 +24,11 @@ sqlite-vec is pre-v1, so expect breaking changes.
|
|||
|
||||
const REF_PATH = resolve(
|
||||
dirname(fileURLToPath(import.meta.url)),
|
||||
"../reference.yaml"
|
||||
"../reference.yaml",
|
||||
);
|
||||
const EXT_PATH = resolve(
|
||||
dirname(fileURLToPath(import.meta.url)),
|
||||
"../dist/vec0"
|
||||
"../dist/vec0",
|
||||
);
|
||||
|
||||
const DocSchema = v.objectWithRest(
|
||||
|
|
@ -38,7 +38,7 @@ const DocSchema = v.objectWithRest(
|
|||
v.object({
|
||||
title: v.string(),
|
||||
desc: v.string(),
|
||||
})
|
||||
}),
|
||||
),
|
||||
},
|
||||
v.record(
|
||||
|
|
@ -47,8 +47,8 @@ const DocSchema = v.objectWithRest(
|
|||
params: v.array(v.string()),
|
||||
desc: v.string(),
|
||||
example: v.union([v.string(), v.array(v.string())]),
|
||||
})
|
||||
)
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
const tableConfig = {
|
||||
|
|
@ -92,8 +92,9 @@ function formatSingleValue(value) {
|
|||
s += "'";
|
||||
return `-- ${s}`;
|
||||
}
|
||||
if (typeof value === "object" || Array.isArray(value))
|
||||
if (typeof value === "object" || Array.isArray(value)) {
|
||||
return "-- " + JSON.stringify(value, null, 2);
|
||||
}
|
||||
}
|
||||
function formatValue(value) {
|
||||
if (typeof value === "string") return `'${value}'`;
|
||||
|
|
@ -107,8 +108,9 @@ function formatValue(value) {
|
|||
s += "'";
|
||||
return s;
|
||||
}
|
||||
if (typeof value === "object" || Array.isArray(value))
|
||||
if (typeof value === "object" || Array.isArray(value)) {
|
||||
return JSON.stringify(value, null, 2);
|
||||
}
|
||||
}
|
||||
function tableize(stmt, results) {
|
||||
const columnNames = stmt.columns().map((c) => c.name);
|
||||
|
|
@ -159,10 +161,9 @@ function renderExamples(db, name, example) {
|
|||
continue;
|
||||
}
|
||||
|
||||
const result =
|
||||
results.length > 1 || stmt.columns().length > 1
|
||||
? `/*\n${tableize(stmt, results)}\n*/\n`
|
||||
: formatSingleValue(results[0][0]);
|
||||
const result = results.length > 1 || stmt.columns().length > 1
|
||||
? `/*\n${tableize(stmt, results)}\n*/\n`
|
||||
: formatSingleValue(results[0][0]);
|
||||
md += result + "\n\n";
|
||||
}
|
||||
|
||||
|
|
@ -182,9 +183,11 @@ for (const section in doc.sections) {
|
|||
md += doc.sections[section].desc;
|
||||
md += "\n\n";
|
||||
|
||||
for (const [name, { params, desc, example }] of Object.entries(
|
||||
doc[section]
|
||||
)) {
|
||||
for (
|
||||
const [name, { params, desc, example }] of Object.entries(
|
||||
doc[section],
|
||||
)
|
||||
) {
|
||||
const headerText = `\`${name}(${(params ?? []).join(", ")})\` {#${name}}`;
|
||||
|
||||
md += "### " + headerText + "\n\n";
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
# Compiling `sqlite-vec`
|
||||
|
||||
## From Source
|
||||
|
||||
```bash
|
||||
git clone https://github.com/asg017/sqlite-vec
|
||||
cd sqlite-vec
|
||||
make loadable
|
||||
```
|
||||
|
||||
`dist/vec0`
|
||||
|
||||
|
||||
## From the amalgammation build
|
||||
|
||||
```
|
||||
https://github.com/asg017/sqlite-vec/releases/download/latest/TODO
|
||||
```
|
||||
|
||||
## Compile-time options
|
||||
|
||||
|
||||
SQLITE_VEC_ENABLE_AVX
|
||||
|
||||
SQLITE_VEC_ENABLE_NEON
|
||||
|
||||
SQLITE_VEC_OMIT_FS
|
||||
15
site/features/knn.md
Normal file
15
site/features/knn.md
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# KNN queries
|
||||
|
||||
## `vec0` virtual tables
|
||||
|
||||
## Manually with `vec_distance_l2()`
|
||||
|
||||
|
||||
```sql
|
||||
create table items(
|
||||
contents text,
|
||||
contents_embedding float[768] (check vec_f32(contents_embedding))
|
||||
);
|
||||
```
|
||||
|
||||
## Static Blobs
|
||||
|
|
@ -31,8 +31,11 @@ gem install sqlite-vec
|
|||
cargo add sqlite-vec
|
||||
```
|
||||
|
||||
```bash [Go]
|
||||
go get -u github.com/asg017/sqlite-vec/bindings/go/cgo
|
||||
```bash [Go (CGO)]
|
||||
go get -u github.com/asg017/sqlite-vec-go-bindings/cgo
|
||||
```
|
||||
```bash [Go (ncruces WASM)]
|
||||
go get -u github.com/asg017/sqlite-vec-go-bindings/ncruces
|
||||
```
|
||||
|
||||
```bash [Datasette]
|
||||
|
|
@ -45,5 +48,31 @@ sqlite-utils install sqlite-utils-sqlite-vec
|
|||
|
||||
:::
|
||||
|
||||
## Pre-compiled extensions
|
||||
|
||||
Alternatively, you can download pre-compiled loadable extensions from the
|
||||
[`sqlite-vec` Github Releases](https://github.com/asg017/sqlite-vec/releases/latest).
|
||||
|
||||
There's also an `install.sh` script that will automatically download the appropriate pre-compiled extension from Github Releases to your machine.
|
||||
|
||||
|
||||
```sh
|
||||
# yolo
|
||||
curl -L https://github.com/asg017/sqlite-vec/releases/download/latest/install.sh | sh
|
||||
```
|
||||
|
||||
```sh
|
||||
# ok lets play it safe
|
||||
curl -o install.sh -L https://github.com/asg017/sqlite-vec/releases/download/latest/install.sh
|
||||
# inspect your scripts
|
||||
cat install.sh
|
||||
# TODO Test if execute permissions?
|
||||
./install.sh
|
||||
```
|
||||
|
||||
|
||||
## Compiling
|
||||
|
||||
`sqlite-vec` is a single `sqlite-vec.c` and `sqlite-vec.h`, and can be easily compiled for different platforms, or statically linked into larger applications.
|
||||
|
||||
See [*Compiling `sqlite-vec`*](#compiling) for more information.
|
||||
|
|
|
|||
|
|
@ -2,17 +2,17 @@
|
|||
|
||||
Matryoshka embeddings are a new class of embedding models introduced in the
|
||||
TODO-YYY paper [_TODO title_](https://arxiv.org/abs/2205.13147). They allow one
|
||||
to truncate excess dimensions in large vector, without lossing much quality.
|
||||
to truncate excess dimensions in large vector, without sacrificing much quality.
|
||||
|
||||
Let's say your embedding model generate 1024-dimensional vectors. If you have 1
|
||||
million of these 1024-dimensional vectors, they would take up `4.096 GB` of
|
||||
space! You're not able to reduce the dimensions without lossing a lot of
|
||||
space! You're not able to reduce the dimensions without losing a lot of
|
||||
quality - if you were to remove half of the dimensions 512-dimensional vectors,
|
||||
you could expect to also lose 50% or more of the quality of results. There are
|
||||
other dimensional-reduction techniques, like [PCA](#TODO), but this requires a
|
||||
complicated and expensive training process.
|
||||
other dimensional-reduction techniques, like [PCA](#TODO) or [Product Quantization](#TODO), but they typically require
|
||||
complicated and expensive training processes.
|
||||
|
||||
Matryoshka embeddings, on the other hand, _can_ be truncated, without losing
|
||||
Matryoshka embeddings, on the other hand, _can_ be truncated, without losing much
|
||||
quality. Using [`mixedbread.ai`](#TODO) `mxbai-embed-large-v1` model, they claim
|
||||
that
|
||||
|
||||
|
|
@ -20,16 +20,20 @@ They are called "Matryoshka" embeddings because ... TODO
|
|||
|
||||
## Matryoshka Embeddings with `sqlite-vec`
|
||||
|
||||
You can use a combination of [`vec_slice()`](/api-reference#vec_slice) and
|
||||
[`vec_normalize()`](/api-reference#vec_slice) on Matryoshka embeddings to
|
||||
You can use a combination of [`vec_slice()`](../api-reference.md#vec_slice) and
|
||||
[`vec_normalize()`](../api-reference.md#vec_slice) on Matryoshka embeddings to
|
||||
truncate.
|
||||
|
||||
```sql
|
||||
select
|
||||
vec_normalize(vec_slice(title_embeddings, 0, 256)) as title_embeddings_256d
|
||||
vec_normalize(
|
||||
vec_slice(title_embeddings, 0, 256)
|
||||
) as title_embeddings_256d
|
||||
from vec_articles;
|
||||
```
|
||||
|
||||
[`vec_slice()`](../api-reference.md#vec_slice) will cut down the vector to the first 256 dimensions. Then [`vec_normalize()`](../api-reference.md#vec_normalize) will normalize that truncated vector, which is typically a required step for Matryoshka embeddings.
|
||||
|
||||
## Benchmarks
|
||||
|
||||
## Suppported Models
|
||||
|
|
@ -47,3 +51,7 @@ https://www.mixedbread.ai/blog/binary-mrl
|
|||
`mxbai-embed-large-v1`: 1024, 512, 256, 128, 64
|
||||
|
||||
`nomic-embed-text-v1.5`: 768, 512, 256, 128, 64
|
||||
|
||||
```
|
||||
# TODO new snowflake model
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
import { readFileSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { dirname, join } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const PROJECT = "sqlite-vec";
|
||||
|
||||
const VERSION = readFileSync(
|
||||
join(dirname(fileURLToPath(import.meta.url)), "..", "VERSION"),
|
||||
"utf8"
|
||||
"utf8",
|
||||
);
|
||||
|
||||
export default {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
# Using `sqlite-vec` in Datasette
|
||||
|
||||
[](https://datasette.io/plugins/datasette-sqlite-vec)
|
||||
|
||||
```bash
|
||||
datasette install datasette-sqlite-vec
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
# Using `sqlite-vec` in Go
|
||||
|
||||
[](https://pkg.go.dev/github.com/asg017/sqlite-vec-go-bindings/cgo) [](https://pkg.go.dev/github.com/asg017/sqlite-vec-go-bindings/ncruces)
|
||||
|
||||
There are two ways you can embed `sqlite-vec` into Go applications: a CGO option
|
||||
for libraries like
|
||||
[`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3), or a
|
||||
|
|
@ -8,14 +10,87 @@ WASM-based option with
|
|||
|
||||
## Option 1: CGO
|
||||
|
||||
If using [`github.com/mattn/go-sqlite3`](https://github.com/mattn/go-sqlite3) or another CGO-based SQLite library, then use the `github.com/asg017/sqlite-vec-go-bindings/cgo` module to embed `sqlite-vec` into your Go application.
|
||||
|
||||
```bash
|
||||
go get -u github.com/asg017/sqlite-vec/bindings/go/cgo
|
||||
go get -u github.com/asg017/sqlite-vec-go-bindings/cgo
|
||||
```
|
||||
|
||||
This will compile and statically link `sqlite-vec` into your project. The initial build will be slow, but later builds will be cached and much faster.
|
||||
|
||||
Use `sqlite_vec.Auto()` to enable `sqlite-vec` functions in all future database connections. Also `sqlite_vec.Cancel()` is available to undo `Auto()`.
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"log"
|
||||
|
||||
sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func main() {
|
||||
sqlite_vec.Auto()
|
||||
db, err := sql.Open("sqlite3", ":memory:")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
var vecVersion string
|
||||
err = db.QueryRow("select vec_version()").Scan(&vecVersion)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
log.Printf("sqlite_version=%s, vec_version=%s\n",vecVersion)
|
||||
}
|
||||
```
|
||||
|
||||
## Option 2: WASM based with `ncruces/go-sqlite3`
|
||||
|
||||
```
|
||||
go
|
||||
[`github.com/ncruces/go-sqlite3`](https://github.com/ncruces/go-sqlite3) is an alternative SQLite Go driver that avoids CGO by using a custom WASM build of SQLite. To use `sqlite-vec` from this library, use the specicial WASM binary provided in `github.com/asg017/sqlite-vec-go-bindings/ncruces`.
|
||||
|
||||
```bash
|
||||
go get -u github.com/asg017/sqlite-vec-go-bindings/ncruces
|
||||
```
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"log"
|
||||
|
||||
_ "github.com/asg017/sqlite-vec-go-bindings/ncruces"
|
||||
"github.com/ncruces/go-sqlite3"
|
||||
)
|
||||
|
||||
func main() {
|
||||
db, err := sqlite3.Open(":memory:")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
stmt, _, err := db.Prepare(`SELECT sqlite_version(), vec_version()`)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
stmt.Step()
|
||||
log.Printf("vec_version=%s\n", stmt.ColumnText(0))
|
||||
stmt.Close()
|
||||
}
|
||||
```
|
||||
|
||||
The `github.com/asg017/sqlite-vec-go-bindings/ncruces` package embeds a custom WASM build of SQLite, so there's no need to use `github.com/ncruces/go-sqlite3/embed`.
|
||||
|
||||
## Working with vectors in Go
|
||||
|
||||
|
||||
If vectors are provided as a list of floats, use `SerializeFloat32(list)` to serialize them into the compact BLOB format that `sqlite-vec` expects.
|
||||
|
||||
```go
|
||||
TODO
|
||||
```
|
||||
|
|
|
|||
|
|
@ -56,41 +56,48 @@ accessor to bind as a parameter to `sqlite-vec` SQL functions.
|
|||
```js
|
||||
// TODO
|
||||
const embedding = new Float32Array([0.1, 0.2, 0.3, 0.4]);
|
||||
const stmt = db.prepare("INSERT INTO vss_demo VALUES (?)");
|
||||
stmt.run(embedding.buffer);
|
||||
const stmt = db.prepare("select vec_length(?)");
|
||||
console.log(stmt.run(embedding.buffer));
|
||||
```
|
||||
|
||||
## Node.js
|
||||
|
||||
Here's a quick recipe of using `sqlite-vec` with [`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3) in Node.js.
|
||||
Here's a quick recipe of using `sqlite-vec` with
|
||||
[`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3) in Node.js.
|
||||
|
||||
```js
|
||||
|
||||
```
|
||||
|
||||
See [`simple-node/demo.mjs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-node/demo.mjs)
|
||||
See
|
||||
[`simple-node/demo.mjs`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-node/demo.mjs)
|
||||
for a more complete Node.js demo.
|
||||
|
||||
## Deno
|
||||
|
||||
Here's a quick recipe of using `sqlite-vec` with [`jsr:@db/sqlite`](https://jsr.io/@db/sqlite) in Deno. It will only work on Deno version `1.44` or greater, because of a bug in previous Deno version.
|
||||
Here's a quick recipe of using `sqlite-vec` with
|
||||
[`jsr:@db/sqlite`](https://jsr.io/@db/sqlite) in Deno. It will only work on Deno
|
||||
version `1.44` or greater, because of a bug in previous Deno version.
|
||||
|
||||
Keep in mind, the `better-sqlite3` example above also works in Deno, you just need to prefix the `better-sqlite3` import with `npm:`, like `import * from "npm:better-sqlite3"`.
|
||||
Keep in mind, the `better-sqlite3` example above also works in Deno, you just
|
||||
need to prefix the `better-sqlite3` import with `npm:`, like
|
||||
`import * from "npm:better-sqlite3"`.
|
||||
|
||||
```ts
|
||||
|
||||
```
|
||||
|
||||
See [`simple-deno/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-deno/demo.ts)
|
||||
See
|
||||
[`simple-deno/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-deno/demo.ts)
|
||||
for a more complete Deno demo.
|
||||
|
||||
## Bun
|
||||
|
||||
Here's a quick recipe of using `sqlite-vec` with [`bun:sqlite`](https://bun.sh/docs/api/sqlite) in Bun. The `better-sqlite3` example above also works with Bun.
|
||||
Here's a quick recipe of using `sqlite-vec` with
|
||||
[`bun:sqlite`](https://bun.sh/docs/api/sqlite) in Bun. The `better-sqlite3`
|
||||
example above also works with Bun.
|
||||
|
||||
```ts
|
||||
|
||||
```
|
||||
|
||||
See [`simple-bun/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-bun/demo.ts)
|
||||
See
|
||||
[`simple-bun/demo.ts`](https://github.com/asg017/sqlite-vec/blob/main/examples/simple-bun/demo.ts)
|
||||
for a more complete Bun demo.
|
||||
|
|
|
|||
|
|
@ -34,126 +34,77 @@ print(f"vec_version={vec_version}")
|
|||
|
||||
### Lists
|
||||
|
||||
If the vectors you are working with are provided as a list of floats, you can convert them into the compact BLOB format that `sqlite-vec` uses with [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack).
|
||||
If your vectors in Python are provided as a list of floats, you can
|
||||
convert them into the compact BLOB format that `sqlite-vec` uses with
|
||||
`serialize_float32()`. This will internally call [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack).
|
||||
|
||||
```python
|
||||
import struct
|
||||
|
||||
def serialize(vector: List[float]) -> bytes:
|
||||
""" serializes a list of floats into a compact "raw bytes" format """
|
||||
return struct.pack('%sf' % len(vector), *vector)
|
||||
|
||||
from sqlite_vec import serialize_float32
|
||||
|
||||
embedding = [0.1, 0.2, 0.3, 0.4]
|
||||
result = db.execute('select vec_length(?)', [serialize(embedding)]).fetchone()[0]
|
||||
result = db.execute('select vec_length(?)', [serialize_float32(embedding)])
|
||||
|
||||
print(result) # 4
|
||||
print(result.fetchone()[0]) # 4
|
||||
```
|
||||
|
||||
### NumPy Arrays
|
||||
|
||||
If your vectors are from `numpy` arrays, the Python SQLite package allows you to pass it along as-is. Make sure you convert your array elements to 32-bit floats with [`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html), as some embedding services will use `np.float64` elements.
|
||||
|
||||
If your vectors are NumPy arrays, the Python SQLite package allows you to
|
||||
pass it along as-is, since NumPy arrays implement [the Buffer protocol](https://docs.python.org/3/c-api/buffer.html). Make sure you cast your array elements to 32-bit floats
|
||||
with
|
||||
[`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html),
|
||||
as some embeddings will use `np.float64`.
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
import sqlite3
|
||||
import sqlite_vec
|
||||
|
||||
db = sqlite3.connect(":memory:")
|
||||
db.enable_load_extension(True)
|
||||
sqlite_vec.load(db)
|
||||
db.enable_load_extension(False)
|
||||
|
||||
db.execute("CREATE VIRTUAL TABLE vec_demo(sample_embedding float[4])")
|
||||
|
||||
embedding = np.array([0.1, 0.2, 0.3, 0.4])
|
||||
db.execute(
|
||||
"INSERT INTO vec_demo(sample_embedding) VALUES (?)", [embedding.astype(np.float32)]
|
||||
)
|
||||
"SELECT vec_length(?)", [embedding.astype(np.float32)]
|
||||
) # 4
|
||||
```
|
||||
|
||||
## Recipes
|
||||
|
||||
### OpenAI
|
||||
|
||||
https://platform.openai.com/docs/guides/embeddings/what-are-embeddings?lang=python
|
||||
|
||||
TODO
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
import sqlite3
|
||||
import sqlite_vec
|
||||
|
||||
texts = [
|
||||
|
||||
'Capri-Sun is a brand of juice concentrate–based drinks manufactured by the German company Wild and regional licensees.',
|
||||
'Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.',
|
||||
'George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.',
|
||||
'Alan Mathison Turing was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.',
|
||||
'Alaqua Cox is a Native American (Menominee) actress.'
|
||||
]
|
||||
|
||||
# change ':memory:' to a filepath to persist data
|
||||
db = sqlite3.connect(':memory:')
|
||||
db.enable_load_extension(True)
|
||||
sqlite_vec.load(db)
|
||||
db.enable_load_extension(False)
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
response = client.embeddings.create(
|
||||
input=[texts],
|
||||
model="text-embedding-3-small"
|
||||
)
|
||||
|
||||
print(response.data[0].embedding)
|
||||
```
|
||||
|
||||
### llamafile
|
||||
|
||||
https://github.com/Mozilla-Ocho/llamafile
|
||||
|
||||
TODO
|
||||
|
||||
### llama-cpp-python
|
||||
|
||||
https://github.com/abetlen/llama-cpp-python
|
||||
|
||||
TODO
|
||||
|
||||
### sentence-transformers (etc.)
|
||||
|
||||
https://github.com/UKPLab/sentence-transformers
|
||||
|
||||
TODO
|
||||
|
||||
## Using an up-to-date version of SQLite
|
||||
|
||||
Some features of `sqlite-vec` will require an up-to-date SQLite library. You can see what version of SQLite your Python environment uses with [`sqlite3.sqlite-version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version), or with this one-line command:
|
||||
|
||||
Some features of `sqlite-vec` will require an up-to-date SQLite library. You can
|
||||
see what version of SQLite your Python environment uses with
|
||||
[`sqlite3.sqlite_version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version),
|
||||
or with this one-line command:
|
||||
|
||||
```bash
|
||||
python -c 'import sqlite3; print(sqlite3.sqlite_version)'
|
||||
```
|
||||
|
||||
Currently, **SQLite version 3.41 or higher** is recommended but not required. `sqlite-vec` will work with older version, but certain features and queries will only work correctly in >=3.41.
|
||||
Currently, **SQLite version 3.41 or higher** is recommended but not required.
|
||||
`sqlite-vec` will work with older versions, but certain features and queries will
|
||||
only work correctly in >=3.41.
|
||||
|
||||
To "upgrade" the SQLite version your Python installation uses, you have a few options.
|
||||
To "upgrade" the SQLite version your Python installation uses, you have a few
|
||||
options.
|
||||
|
||||
### Compile your own SQLite version
|
||||
|
||||
You can compile an up-to-date version of SQLite and use some system environment variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a different SQLite library. [This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python) goes into this approach in more details.
|
||||
You can compile an up-to-date version of SQLite and use some system environment
|
||||
variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a
|
||||
different SQLite library.
|
||||
[This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python)
|
||||
goes into this approach in more details.
|
||||
|
||||
Although compiling SQLite can be straightforward, there are a lot of different compilation options to consider, which makes it confusing. This also doesn't work with Windows, which statically compiles its own SQLite library.
|
||||
Although compiling SQLite can be straightforward, there are a lot of different
|
||||
compilation options to consider, which makes it confusing. This also doesn't
|
||||
work with Windows, which statically compiles its own SQLite library.
|
||||
|
||||
### Use `pysqlite3`
|
||||
|
||||
[`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package that bundles an up-to-date SQLite library as a separate pip package.
|
||||
[`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package
|
||||
that bundles an up-to-date SQLite library as a separate pip package.
|
||||
|
||||
While it's mostly compatible with the Python `sqlite3` module, there are a few rare edge cases where the APIs don't match.
|
||||
While it's mostly compatible with the Python `sqlite3` module, there are a few
|
||||
rare edge cases where the APIs don't match.
|
||||
|
||||
### Upgrading your Python version
|
||||
|
||||
Sometimes installing a latest version of Python will "magically" upgrade your SQLite version as well. This is a nuclear option, as upgrading Python installations can be quite the hassle, but most Python 3.12 builds will have a very recent SQLite version.
|
||||
Sometimes installing a latest version of Python will "magically" upgrade your
|
||||
SQLite version as well. This is a nuclear option, as upgrading Python
|
||||
installations can be quite the hassle, but most Python 3.12 builds will have a
|
||||
very recent SQLite version.
|
||||
|
|
|
|||
|
|
@ -1,9 +1,37 @@
|
|||
# Using `sqlite-vec` in Ruby
|
||||
|
||||
https://rubygems.org/gems/sqlite-vec
|
||||

|
||||
|
||||
Ruby developers can use `sqlite-vec` with the [`sqlite-vec` Gem](https://rubygems.org/gems/sqlite-vec).
|
||||
|
||||
|
||||
```bash
|
||||
gem install sqlite-vec
|
||||
```
|
||||
|
||||
You can then use `SqliteVss.load()` to load `sqlite-vss` SQL functions in a given SQLite connection.
|
||||
|
||||
```ruby
|
||||
require 'sqlite3'
|
||||
require 'sqlite_vec'
|
||||
|
||||
db = SQLite3::Database.new(':memory:')
|
||||
db.enable_load_extension(true)
|
||||
SqliteVec.load(db)
|
||||
db.enable_load_extension(false)
|
||||
|
||||
result = db.execute('SELECT vec_version()')
|
||||
puts result.first.first
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Working with vectors in Ruby
|
||||
|
||||
If your embeddings are provided as a list of numbers, use `.pack("f*")` to convert them into the compact BLOB format that `sqlite-vec` uses.
|
||||
|
||||
```ruby
|
||||
embedding = [0.1, 0.2, 0.3, 0.4]
|
||||
result = db.execute("SELECT vec_length(?)", [query.pack("f*")]])
|
||||
puts result.first.first # 4
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
# Using `sqlite-vec` in Rust
|
||||
[](https://crates.io/crates/sqlite-vec)
|
||||
|
||||
You can embed `sqlite-vec` into your Rust projects using the official
|
||||
[`sqlite-vec` crate](https://crates.io/crates/sqlite-vec).
|
||||
|
|
@ -18,16 +19,29 @@ SQLite library's `sqlite3_auto_extension()` function. Here's an example with
|
|||
|
||||
```rs
|
||||
use sqlite_vec::sqlite3_vec_init;
|
||||
use rusqlite::{ffi::sqlite3_auto_extension};
|
||||
use rusqlite::{ffi::sqlite3_auto_extension, Result};
|
||||
|
||||
fn main() {
|
||||
fn main()-> Result<()> {
|
||||
unsafe {
|
||||
sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ())));
|
||||
}
|
||||
// future database connection will now automatically include sqlite-vec functions!
|
||||
let db = Connection::open_in_memory()?;
|
||||
let vec_version: String = db.query_row("select vec_version()", &[v.as_bytes()], |x| x.get(0)?)?;
|
||||
|
||||
println!("vec_version={vec_version}");
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
A full [`sqlite-vec` Rust demo](#TODO) is also available.
|
||||
|
||||
## Working with vectors in Rust
|
||||
|
||||
If your vectors are provided as a `Vec<f32>` type, the [`zerocopy` crate](https://crates.io/crates/zerocopy) is recommended, specifically `zerocopy::AsBytes`. This will allow you to pass in vectors into `sqlite-vec` without any copying.
|
||||
|
||||
```rs
|
||||
let query: Vec<f32> = vec![0.1, 0.2, 0.3, 0.4];
|
||||
let mut stmt = db.prepare("SELECT vec_length(?)")?;
|
||||
stmt.execute(&[item.1.as_bytes()])?;
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
# Using `sqlite-vec` in `sqlite-utils`
|
||||
|
||||

|
||||
|
||||
```bash
|
||||
sqlite-utils install sqlite-utils-sqlite-vec
|
||||
```
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
# `sqlite-vec` in the Browser with WebAssembly
|
||||
|
||||
```html
|
||||
<html>
|
||||
<body>
|
||||
<script type="module">
|
||||
import {default as init} from "https://cdn.jsdelivr.net/npm/sqlite-vec-wasm-demo@latest/sqlite3.mjs";
|
||||
|
||||
const sqlite3 = await init();
|
||||
const db = new sqlite3.oo1.DB(":memory:");
|
||||
|
||||
const [sqlite_version, vec_version] = db.selectArray('select vec_version();')
|
||||
console.log(`vec_version=${vec_version}`);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
# Semantic Versioning for `sqlite-vec`
|
||||
|
||||
`sqlite-vec` is pre-v1, so according to the rules of [Semantic Versioning](https://semver.org/),
|
||||
so "minor" release like "0.2.0" or "0.3.0" may contain breaking changes.
|
||||
`sqlite-vec` is pre-v1, so according to the rules of
|
||||
[Semantic Versioning](https://semver.org/), so "minor" release like "0.2.0" or
|
||||
"0.3.0" may contain breaking changes.
|
||||
|
||||
But what exactly counts as a "breaking change" in a SQLite extension? The line isn't so clear, unforetunately.
|
||||
Here are a all the surfaces that COULD count as a "breaking change":
|
||||
But what exactly counts as a "breaking change" in a SQLite extension? The line
|
||||
isn't so clear, unforetunately. Here are a all the surfaces that COULD count as
|
||||
a "breaking change":
|
||||
|
||||
- SQL functions and columns on virtual tables
|
||||
- The C API (extension entrypoints)
|
||||
|
|
@ -13,7 +15,6 @@ Here are a all the surfaces that COULD count as a "breaking change":
|
|||
|
||||
## What counts as a "breaking change"?
|
||||
|
||||
|
||||
### Changes to SQL functions
|
||||
|
||||
- Re-naming or removing an SQL function
|
||||
|
|
@ -25,14 +26,13 @@ Here are a all the surfaces that COULD count as a "breaking change":
|
|||
|
||||
### Changes to the C API
|
||||
|
||||
Currently there is no "official" C API for `sqlite-vec`. However, there are entrypoints defined in C that C developers or developers using FFI can call. Any
|
||||
|
||||
Currently there is no "official" C API for `sqlite-vec`. However, there are
|
||||
entrypoints defined in C that C developers or developers using FFI can call. Any
|
||||
|
||||
### Compile-time options
|
||||
|
||||
The removal of any compile time options
|
||||
|
||||
|
||||
## When is `v1.0` coming?
|
||||
|
||||
In a few months! The main problems I want to solve before `v1.0` include:
|
||||
|
|
@ -42,8 +42,13 @@ In a few months! The main problems I want to solve before `v1.0` include:
|
|||
- ANN indexing
|
||||
- Quantization + pre-transformations
|
||||
|
||||
Once those items are complete, I will likely create a `v1.0` release, along with renaming the `vec0` virtual table modile to `vec1`. And if future major releases are required, a `v2.0` major releases will be made with new `vec2` virtual tables and so on.
|
||||
Once those items are complete, I will likely create a `v1.0` release, along with
|
||||
renaming the `vec0` virtual table modile to `vec1`. And if future major releases
|
||||
are required, a `v2.0` major releases will be made with new `vec2` virtual
|
||||
tables and so on.
|
||||
|
||||
Ideally, only a `v1` major release would be required. But who knows what the future has in store with vector search!
|
||||
Ideally, only a `v1` major release would be required. But who knows what the
|
||||
future has in store with vector search!
|
||||
|
||||
In general, I will try my best to maximize stability and limit the number of breaking changes for future `sqlite-vec` versions.
|
||||
In general, I will try my best to maximize stability and limit the number of
|
||||
breaking changes for future `sqlite-vec` versions.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue