mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
doc updates
This commit is contained in:
parent
df48ac2416
commit
b62f6f19a8
31 changed files with 751 additions and 97 deletions
19
README.md
19
README.md
|
|
@ -37,6 +37,25 @@ See <a href="#sponsors">the Sponsors section</a> for more details.
|
||||||
</i>
|
</i>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
## Installing
|
||||||
|
|
||||||
|
See [Installing `sqlite-vec`](https://alexgarcia.xyz/sqlite-vec/installing.html)
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
| Language | Install | More Info | |
|
||||||
|
| -------------- | ---------------------------------------------------- | ------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| Python | `pip install sqlite-vec` | [`sqlite-vec` with Python](https://alexgarcia.xyz/sqlite-vec/python.html) | [](https://pypi.org/project/sqlite-vec/) |
|
||||||
|
| Node.js | `npm install sqlite-vec` | [`sqlite-vec` with Node.js](https://alexgarcia.xyz/sqlite-vec/nodejs.html) | [](https://www.npmjs.com/package/sqlite-vec) |
|
||||||
|
| Ruby | `gem install sqlite-vec` | [`sqlite-vec` with Ruby](https://alexgarcia.xyz/sqlite-vec/ruby.html) |  |
|
||||||
|
| Go | `go get -u github.com/asg017/sqlite-vec/bindings/go` | [`sqlite-vec` with Go](https://alexgarcia.xyz/sqlite-vec/go.html) | [](https://pkg.go.dev/github.com/asg017/sqlite-vec/bindings/go) |
|
||||||
|
| Rust | `cargo add sqlite-vec` | [`sqlite-vec` with Rust](https://alexgarcia.xyz/sqlite-vec/rust.html) | [](https://crates.io/crates/sqlite-vec) |
|
||||||
|
| Datasette | `datasette install datasette-sqlite-vec` | [`sqlite-vec` with Datasette](https://alexgarcia.xyz/sqlite-vec/datasette.html) | [](https://datasette.io/plugins/datasette-sqlite-vec) |
|
||||||
|
| `sqlite-utils` | `sqlite-utils install sqlite-utils-sqlite-vec` | [`sqlite-vec` with sqlite-utils](https://alexgarcia.xyz/sqlite-vec/sqlite-utils.html) | [](https://datasette.io/plugins/datasette-sqlite-vec) |
|
||||||
|
| Github Release | | |  |
|
||||||
|
|
||||||
|
-->
|
||||||
|
|
||||||
## Sample usage
|
## Sample usage
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
|
|
|
||||||
90
examples/python-recipes/openai-sample.py
Normal file
90
examples/python-recipes/openai-sample.py
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
# pip install openai sqlite-vec
|
||||||
|
|
||||||
|
from openai import OpenAI
|
||||||
|
import sqlite3
|
||||||
|
import sqlite_vec
|
||||||
|
import struct
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
|
def serialize(vector: List[float]) -> bytes:
|
||||||
|
"""serializes a list of floats into a compact "raw bytes" format"""
|
||||||
|
return struct.pack("%sf" % len(vector), *vector)
|
||||||
|
|
||||||
|
|
||||||
|
sentences = [
|
||||||
|
"Capri-Sun is a brand of juice concentrate–based drinks manufactured by the German company Wild and regional licensees.",
|
||||||
|
"George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.",
|
||||||
|
"Alaqua Cox is a Native American (Menominee) actress.",
|
||||||
|
"Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.",
|
||||||
|
"Tamarindo, also commonly known as agua de tamarindo, is a non-alcoholic beverage made of tamarind, sugar, and water.",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
client = OpenAI()
|
||||||
|
|
||||||
|
# change ':memory:' to a filepath to persist data
|
||||||
|
db = sqlite3.connect(":memory:")
|
||||||
|
db.enable_load_extension(True)
|
||||||
|
sqlite_vec.load(db)
|
||||||
|
db.enable_load_extension(False)
|
||||||
|
|
||||||
|
db.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE sentences(
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
sentence TEXT
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
with db:
|
||||||
|
for i, sentence in enumerate(sentences):
|
||||||
|
db.execute("INSERT INTO sentences(id, sentence) VALUES(?, ?)", [i, sentence])
|
||||||
|
|
||||||
|
db.execute(
|
||||||
|
"""
|
||||||
|
CREATE VIRTUAL TABLE vec_sentences USING vec0(
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
sentence_embedding FLOAT[1536]
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
with db:
|
||||||
|
sentence_rows = db.execute("SELECT id, sentence FROM sentences").fetchall()
|
||||||
|
response = client.embeddings.create(
|
||||||
|
input=[row[1] for row in sentence_rows], model="text-embedding-3-small"
|
||||||
|
)
|
||||||
|
for (id, _), embedding in zip(sentence_rows, response.data):
|
||||||
|
db.execute(
|
||||||
|
"INSERT INTO vec_sentences(id, sentence_embedding) VALUES(?, ?)",
|
||||||
|
[id, serialize(embedding.embedding)],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
query = "fruity liquids"
|
||||||
|
query_embedding = (
|
||||||
|
client.embeddings.create(input=query, model="text-embedding-3-small")
|
||||||
|
.data[0]
|
||||||
|
.embedding
|
||||||
|
)
|
||||||
|
|
||||||
|
results = db.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
vec_sentences.id,
|
||||||
|
distance,
|
||||||
|
sentence
|
||||||
|
FROM vec_sentences
|
||||||
|
LEFT JOIN sentences ON sentences.id = vec_sentences.id
|
||||||
|
WHERE sentence_embedding MATCH ?
|
||||||
|
AND k = 3
|
||||||
|
ORDER BY distance
|
||||||
|
""",
|
||||||
|
[serialize(query_embedding)],
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
for row in results:
|
||||||
|
print(row)
|
||||||
|
|
@ -11,6 +11,17 @@ const VERSION = readFileSync(
|
||||||
"utf8"
|
"utf8"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const sqliteLanuage = JSON.parse(
|
||||||
|
readFileSync(
|
||||||
|
join(
|
||||||
|
dirname(fileURLToPath(import.meta.url)),
|
||||||
|
"..",
|
||||||
|
"sqlite.tmlanguage.json"
|
||||||
|
),
|
||||||
|
"utf8"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
function head(): HeadConfig[] {
|
function head(): HeadConfig[] {
|
||||||
return [
|
return [
|
||||||
[
|
[
|
||||||
|
|
@ -18,7 +29,7 @@ function head(): HeadConfig[] {
|
||||||
{
|
{
|
||||||
rel: "shortcut icon",
|
rel: "shortcut icon",
|
||||||
type: "image/svg+xml",
|
type: "image/svg+xml",
|
||||||
href: "favicon.svg",
|
href: "./logo.light.svg",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
|
|
@ -36,22 +47,34 @@ const guides = {
|
||||||
text: "Guides",
|
text: "Guides",
|
||||||
collapsed: true,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
{ text: "Binary Quantization", link: "/guides/binary-quant" },
|
{ text: "Performance", link: "/guides/performance" },
|
||||||
{ text: "Scalar Quantization", link: "/guides/scalar-quant" },
|
|
||||||
{
|
{
|
||||||
text: "Matryosha/Adaptive Length Embeddings",
|
text: "Vector operations",
|
||||||
link: "/guides/matryoshka",
|
items: [
|
||||||
|
{ text: "Vector Arithmetic", link: "/guides/arithmetic" },
|
||||||
|
{ text: "Binary Quantization", link: "/guides/binary-quant" },
|
||||||
|
{ text: "Scalar Quantization", link: "/guides/scalar-quant" },
|
||||||
|
{
|
||||||
|
text: "Matryoshka Embeddings",
|
||||||
|
link: "/guides/matryoshka",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
text: "Build with sqlite-vec",
|
||||||
|
items: [
|
||||||
|
{ text: "Semantic Search", link: "/guides/semantic-search" },
|
||||||
|
{ text: "Hybrid Search", link: "/guides/hybrid-search" },
|
||||||
|
{ text: "Retrival Augmented Generation (RAG)", link: "/guides/rag" },
|
||||||
|
{ text: "Classifiers", link: "/guides/classifiers" },
|
||||||
|
],
|
||||||
},
|
},
|
||||||
{ text: "Semantic Search", link: "/guides/semantic-search" },
|
|
||||||
{ text: "Hybrid Search", link: "/guides/hybrid-search" },
|
|
||||||
{ text: "Classifiers", link: "/guides/classifiers" },
|
|
||||||
{ text: "Improving Performance", link: "/guides/improving-perf" },
|
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
||||||
function nav(): DefaultTheme.NavItem[] {
|
function nav(): DefaultTheme.NavItem[] {
|
||||||
return [
|
return [
|
||||||
guides,
|
|
||||||
{ text: "API Reference", link: "/api-reference" },
|
{ text: "API Reference", link: "/api-reference" },
|
||||||
{ text: "♥ Sponsor", link: "https://github.com/sponsors/asg017" },
|
{ text: "♥ Sponsor", link: "https://github.com/sponsors/asg017" },
|
||||||
{
|
{
|
||||||
|
|
@ -103,17 +126,25 @@ function sidebar(): DefaultTheme.SidebarItem[] {
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
text: "Getting Started",
|
text: "Getting Started",
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
{
|
{
|
||||||
text: "Quickstart",
|
text: "Installation",
|
||||||
link: "/getting-started",
|
link: "/installation",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: "Introduction",
|
||||||
|
link: "/introduction",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: "Quick Start",
|
||||||
|
link: "/quickstart",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
text: "Using with...",
|
text: "Using with...",
|
||||||
collapsed: false,
|
collapsed: true,
|
||||||
items: [
|
items: [
|
||||||
{ text: "Python", link: "/python" },
|
{ text: "Python", link: "/python" },
|
||||||
{ text: "JavaScript", link: "/js" },
|
{ text: "JavaScript", link: "/js" },
|
||||||
|
|
@ -124,7 +155,6 @@ function sidebar(): DefaultTheme.SidebarItem[] {
|
||||||
{ text: "WebAssembly (Browser)", link: "/wasm" },
|
{ text: "WebAssembly (Browser)", link: "/wasm" },
|
||||||
{ text: "Datasette", link: "/datasette" },
|
{ text: "Datasette", link: "/datasette" },
|
||||||
{ text: "sqlite-utils", link: "/sqlite-utils" },
|
{ text: "sqlite-utils", link: "/sqlite-utils" },
|
||||||
{ text: "Loadable Extension", link: "/loadable" },
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
guides,
|
guides,
|
||||||
|
|
@ -135,6 +165,10 @@ function sidebar(): DefaultTheme.SidebarItem[] {
|
||||||
{ text: "API Reference", link: "/api-reference" },
|
{ text: "API Reference", link: "/api-reference" },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
text: "Sponsors",
|
||||||
|
link: "/sponsors",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
text: "See also",
|
text: "See also",
|
||||||
items: [
|
items: [
|
||||||
|
|
@ -163,13 +197,18 @@ export default defineConfig({
|
||||||
head: head(),
|
head: head(),
|
||||||
base: "/sqlite-vec/",
|
base: "/sqlite-vec/",
|
||||||
themeConfig: {
|
themeConfig: {
|
||||||
|
logo: {
|
||||||
|
light: "/logo.dark.svg",
|
||||||
|
dark: "/logo.light.svg",
|
||||||
|
alt: "sqlite-vec logo",
|
||||||
|
},
|
||||||
|
|
||||||
nav: nav(),
|
nav: nav(),
|
||||||
|
|
||||||
sidebar: sidebar(),
|
sidebar: sidebar(),
|
||||||
|
|
||||||
footer: {
|
footer: {
|
||||||
message: "MIT License",
|
message: "MIT/Apache-2 License",
|
||||||
copyright: "Copyright © 2024 Alex Garcia",
|
copyright:
|
||||||
|
'Copyright © 2024 <a href="https://alexgarcia.xyz/">Alex Garcia</a>',
|
||||||
},
|
},
|
||||||
outline: "deep",
|
outline: "deep",
|
||||||
search: {
|
search: {
|
||||||
|
|
@ -185,20 +224,10 @@ export default defineConfig({
|
||||||
},
|
},
|
||||||
rewrites: {
|
rewrites: {
|
||||||
"using/:pkg.md": ":pkg.md",
|
"using/:pkg.md": ":pkg.md",
|
||||||
"guides/:pkg.md": ":pkg.md",
|
"getting-started/:pkg.md": ":pkg.md",
|
||||||
|
//"guides/:pkg.md": ":pkg.md",
|
||||||
},
|
},
|
||||||
markdown: {
|
markdown: {
|
||||||
languages: [
|
languages: [sqliteLanuage],
|
||||||
JSON.parse(
|
|
||||||
readFileSync(
|
|
||||||
join(
|
|
||||||
dirname(fileURLToPath(import.meta.url)),
|
|
||||||
"..",
|
|
||||||
"sqlite.tmlanguage.json"
|
|
||||||
),
|
|
||||||
"utf8"
|
|
||||||
)
|
|
||||||
),
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
|
||||||
38
site/.vitepress/theme/HeroImg.vue
Normal file
38
site/.vitepress/theme/HeroImg.vue
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
<script setup lang="ts"></script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<div
|
||||||
|
style="
|
||||||
|
background: var(--vp-c-default-3);
|
||||||
|
padding: -4px 12px;
|
||||||
|
border-radius: 10px;
|
||||||
|
"
|
||||||
|
>
|
||||||
|
<div>
|
||||||
|
<div class="language-sqlite vp-adaptive-theme">
|
||||||
|
<pre
|
||||||
|
class="shiki shiki-themes github-light github-dark vp-code"
|
||||||
|
><code><span class="line"><span style="--shiki-light:#6A737D;--shiki-dark:#6A737D;">-- store 768-dimensional vectors in a vec0 virtual table</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">create</span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> virtual</span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> table</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">using</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec0(</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">float</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">[768]</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">);</span></span>
|
||||||
|
<span class="line"></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#6A737D;--shiki-dark:#6A737D;">-- insert vectors into the table, as JSON or compact BLOBs</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">insert into</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies(rowid, synopsis_embedding)</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> select</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> rowid,</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> embed(synopsis) </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">as</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> from</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> movies;</span></span>
|
||||||
|
<span class="line"></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#6A737D;--shiki-dark:#6A737D;">-- KNN search!</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">select</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> rowid,</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> distance</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">from</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">where</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">match</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> embed(</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF;">'scary futuristic movies'</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">)</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">order by</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> distance</span></span>
|
||||||
|
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">limit</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF;"> 20</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">;</span></span></code></pre>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
@ -11,7 +11,7 @@ const sponsors = computed(() => {
|
||||||
{
|
{
|
||||||
name: "Mozilla Builders",
|
name: "Mozilla Builders",
|
||||||
url: "",
|
url: "",
|
||||||
img: withBase("./mozilla.svg"),
|
img: withBase("/mozilla.svg"),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
@ -21,7 +21,7 @@ const sponsors = computed(() => {
|
||||||
{
|
{
|
||||||
name: "Fly.io",
|
name: "Fly.io",
|
||||||
url: "https://fly.io",
|
url: "https://fly.io",
|
||||||
img: withBase("./flyio.svg"),
|
img: withBase("/flyio.svg"),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
@ -31,7 +31,7 @@ const sponsors = computed(() => {
|
||||||
{
|
{
|
||||||
name: "Turso",
|
name: "Turso",
|
||||||
url: "https://turso.tech",
|
url: "https://turso.tech",
|
||||||
img: withBase("./turso.svg"),
|
img: withBase("/turso.svg"),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
@ -41,7 +41,7 @@ const sponsors = computed(() => {
|
||||||
{
|
{
|
||||||
name: "SQLite Cloud",
|
name: "SQLite Cloud",
|
||||||
url: "https://sqlitecloud.io",
|
url: "https://sqlitecloud.io",
|
||||||
img: withBase("./sqlitecloud.svg"),
|
img: withBase("/sqlitecloud.svg"),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
@ -51,18 +51,20 @@ const sponsors = computed(() => {
|
||||||
|
|
||||||
<template>
|
<template>
|
||||||
<!--<a class="sponsors-aside-text" href="/sponsor/">Sponsors</a>-->
|
<!--<a class="sponsors-aside-text" href="/sponsor/">Sponsors</a>-->
|
||||||
<VPDocAsideSponsors :data="sponsors" />
|
<div>
|
||||||
<div
|
<VPDocAsideSponsors :data="sponsors" />
|
||||||
style="
|
<div
|
||||||
font-size: 14px;
|
style="
|
||||||
text-align: center;
|
font-size: 14px;
|
||||||
font-style: italic;
|
text-align: center;
|
||||||
margin-top: 4px;
|
font-style: italic;
|
||||||
"
|
margin-top: 4px;
|
||||||
>
|
"
|
||||||
<a href="https://github.com/asg017/sqlite-vec#sponsors"
|
|
||||||
>Become a sponsor! ↗</a
|
|
||||||
>
|
>
|
||||||
|
<a href="https://github.com/asg017/sqlite-vec#sponsors"
|
||||||
|
>Become a sponsor! ↗</a
|
||||||
|
>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import type { Theme } from "vitepress";
|
||||||
import DefaultTheme from "vitepress/theme";
|
import DefaultTheme from "vitepress/theme";
|
||||||
import "./style.css";
|
import "./style.css";
|
||||||
import Sponsors from "./Sponsors.vue";
|
import Sponsors from "./Sponsors.vue";
|
||||||
|
import HeroImg from "./HeroImg.vue";
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
extends: DefaultTheme,
|
extends: DefaultTheme,
|
||||||
|
|
@ -14,7 +15,7 @@ export default {
|
||||||
h("marquee", { class: "banner", scrollamount: "10" }, [
|
h("marquee", { class: "banner", scrollamount: "10" }, [
|
||||||
"🚧🚧🚧 sqlite-vec is still in beta, and this documentation is incomplete! Watch the repo for updates 🚧🚧🚧",
|
"🚧🚧🚧 sqlite-vec is still in beta, and this documentation is incomplete! Watch the repo for updates 🚧🚧🚧",
|
||||||
]),
|
]),
|
||||||
"home-hero-image": () => h("div", {}, [""]),
|
//"home-hero-image": () => h(HeroImg),
|
||||||
"aside-ads-before": () => h(Sponsors),
|
"aside-ads-before": () => h(Sponsors),
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,21 @@
|
||||||
|
/*@import "https://code.cdn.mozilla.net/fonts/zilla-slab.css";*/
|
||||||
|
|
||||||
|
@font-face {
|
||||||
|
font-family: "ZillaSlab-SemiBold";
|
||||||
|
src: url("/fonts/ZillaSlab-SemiBold.woff");
|
||||||
|
src: url("/fonts/ZillaSlab-SemiBold.woff2") format("woff2"),
|
||||||
|
url("/fonts/ZillaSlab-SemiBold.woff") format("woff"),
|
||||||
|
url("/fonts/ZillaSlab(-SemiBold).otf") format("opentype"),
|
||||||
|
url("/fonts/ZillaSlab-SemiBold.ttf") format("truetype");
|
||||||
|
font-weight: 600;
|
||||||
|
font-style: normal;
|
||||||
|
}
|
||||||
|
|
||||||
|
.VPHero h1,
|
||||||
|
.VPNavBarTitle .title {
|
||||||
|
font-family: "ZillaSlab-SemiBold";
|
||||||
|
font-size: 1.5rem;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Customize default theme styling by overriding CSS variables:
|
* Customize default theme styling by overriding CSS variables:
|
||||||
* https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css
|
* https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css
|
||||||
|
|
@ -68,6 +86,17 @@
|
||||||
--vp-c-danger-2: var(--vp-c-red-2);
|
--vp-c-danger-2: var(--vp-c-red-2);
|
||||||
--vp-c-danger-3: var(--vp-c-red-3);
|
--vp-c-danger-3: var(--vp-c-red-3);
|
||||||
--vp-c-danger-soft: var(--vp-c-red-soft);
|
--vp-c-danger-soft: var(--vp-c-red-soft);
|
||||||
|
|
||||||
|
--vp-c-brand-1x: #a6d189;
|
||||||
|
--vp-c-brand-1x: #a6da95;
|
||||||
|
--vp-c-brand-1x: #a6e3a1;
|
||||||
|
}
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--vp-c-brand-1: #1e66f5;
|
||||||
|
}
|
||||||
|
.dark {
|
||||||
|
--vp-c-brand-1: #89b4fa;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -92,19 +121,20 @@
|
||||||
|
|
||||||
:root {
|
:root {
|
||||||
--vp-home-hero-name-color: transparent;
|
--vp-home-hero-name-color: transparent;
|
||||||
--vp-home-hero-name-background: -webkit-linear-gradient(
|
--vp-home-hero-name-background: black;
|
||||||
120deg,
|
|
||||||
#f5c2e7 30%,
|
|
||||||
#94e2d5 /*#bd34fe 30%,
|
|
||||||
#41d1ff*/
|
|
||||||
);
|
|
||||||
|
|
||||||
|
/*
|
||||||
--vp-home-hero-image-background-image: linear-gradient(
|
--vp-home-hero-image-background-image: linear-gradient(
|
||||||
-45deg,
|
-45deg,
|
||||||
#bd34fe 50%,
|
#bd34fe 50%,
|
||||||
#47caff 50%
|
#47caff 50%
|
||||||
);
|
);
|
||||||
--vp-home-hero-image-filter: blur(44px);
|
--vp-home-hero-image-filter: blur(44px);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
.dark {
|
||||||
|
--vp-home-hero-name-background: white;
|
||||||
}
|
}
|
||||||
|
|
||||||
@media (min-width: 640px) {
|
@media (min-width: 640px) {
|
||||||
|
|
|
||||||
49
site/getting-started/installation.md
Normal file
49
site/getting-started/installation.md
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Installing
|
||||||
|
|
||||||
|
You have several options to include `sqlite-vec` into your projects, including
|
||||||
|
PyPi packages for Python, NPM packages for Node.js, Gems for Ruby, and more.
|
||||||
|
|
||||||
|
## With popular package managers
|
||||||
|
|
||||||
|
::: code-group
|
||||||
|
|
||||||
|
```bash [Python]
|
||||||
|
pip install sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [Node.js]
|
||||||
|
npm install sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [Bun]
|
||||||
|
bun install sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [Deno]
|
||||||
|
deno add npm:sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [Ruby]
|
||||||
|
gem install sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [Rust]
|
||||||
|
cargo add sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [Go]
|
||||||
|
go get -u github.com/asg017/sqlite-vec/bindings/go/cgo
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [Datasette]
|
||||||
|
datasette install datasette-sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [sqlite-utils]
|
||||||
|
sqlite-utils install sqlite-utils-sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Alternatively, you can download pre-compiled loadable extensions from the
|
||||||
|
[`sqlite-vec` Github Releases](https://github.com/asg017/sqlite-vec/releases/latest).
|
||||||
7
site/getting-started/introduction.md
Normal file
7
site/getting-started/introduction.md
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# Introduction to `sqlite-vec`
|
||||||
|
|
||||||
|
## Intro to Vector Databases
|
||||||
|
|
||||||
|
## Vector Search in SQLite with `sqlite-vec`
|
||||||
|
|
||||||
|
## Getting help
|
||||||
5
site/guides/arithmetic.md
Normal file
5
site/guides/arithmetic.md
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
# Vector Arithmetic
|
||||||
|
|
||||||
|
- `vec_add()`
|
||||||
|
- `vec_sub()`
|
||||||
|
- `vec_mean()`
|
||||||
120
site/guides/binary-quant.md
Normal file
120
site/guides/binary-quant.md
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
# Binary Quantization
|
||||||
|
|
||||||
|
"Quantization" refers to a variety of methods and techniques for reducing the
|
||||||
|
size of vectors in a vector index. **Binary quantization** (BQ) refers to a
|
||||||
|
specific technique where each individual floating point element in a vector is
|
||||||
|
reduced to a single bit, typically by assigning `0` to negative numbers and `1`
|
||||||
|
to positive numbers.
|
||||||
|
|
||||||
|
For example, in this 8-dimensional `float32` vector:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[-0.73, -0.80, 0.12, -0.73, 0.79, -0.11, 0.23, 0.97]
|
||||||
|
```
|
||||||
|
|
||||||
|
Applying binary quantization would result in the following `bit` vector:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[0, 0, 1, 0, 1, 0, 1, 1]
|
||||||
|
```
|
||||||
|
|
||||||
|
The original 8-dimensional `float32` vector requires `8 * 4 = 32` bytes of space
|
||||||
|
to store. For 1 million vectors, that would be `32MB`. On the other hand, the
|
||||||
|
binary quantized 8-dimensional vector can be stored in a single byte — one bit
|
||||||
|
per element. For 1 million vectors, that would be just `1MB`, a 32x reduction!
|
||||||
|
|
||||||
|
Though keep in mind, you're bound to lose a lot quality when reducing 32 bits of
|
||||||
|
information to 1 bit. [Over-sampling and re-scoring](#re-scoring) will help a
|
||||||
|
lot.
|
||||||
|
|
||||||
|
The main goal of BQ is to dramatically reduce the size of your vector index,
|
||||||
|
resulting in faster searches and less resources. This is especially useful in
|
||||||
|
`sqlite-vec`, which is (currently) brute-force only and meant to run on small
|
||||||
|
devices. BQ is an easy low-cost method to make larger vector datasets easy to
|
||||||
|
manage.
|
||||||
|
|
||||||
|
## Binary Quantization `sqlite-vec`
|
||||||
|
|
||||||
|
The `sqlite-vec` extension offers a `vec_quantize_binary()` SQL scalar function,
|
||||||
|
which applies binary quanitization to a `float32` or `int8` vector. For every
|
||||||
|
element in a given vector, it will apply `0` to negative values and `1` to
|
||||||
|
positive values, and pack them into a `BLOB`.
|
||||||
|
|
||||||
|
```sqlite
|
||||||
|
select vec_quantize_binary('[-0.73, -0.80, 0.12, -0.73, 0.79, -0.11, 0.23, 0.97]');
|
||||||
|
-- X'd4`
|
||||||
|
```
|
||||||
|
|
||||||
|
The single byte `0xd4` in hexadecimal is `11010100` in binary.
|
||||||
|
|
||||||
|
<!-- TODO what https://github.com/asg017/sqlite-vec/issues/23 -->
|
||||||
|
|
||||||
|
## Demo
|
||||||
|
|
||||||
|
```sqlite
|
||||||
|
create virtual table vec_movies using vec0(
|
||||||
|
synopsis_embedding bit[768]
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
```sqlite
|
||||||
|
insert into vec_movies(rowid, synopsis_embedding)
|
||||||
|
VALUES (:id, vec_quantize_binary(:vector));
|
||||||
|
```
|
||||||
|
|
||||||
|
```sqlite
|
||||||
|
select
|
||||||
|
rowid,
|
||||||
|
distance
|
||||||
|
from vec_movies
|
||||||
|
where synopsis_embedding match vec_quantize_binary(:query)
|
||||||
|
order by distance
|
||||||
|
limit 20;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Re-scoring
|
||||||
|
|
||||||
|
```sqlite
|
||||||
|
create virtual table vec_movies using vec0(
|
||||||
|
synopsis_embedding float[768],
|
||||||
|
synopsis_embedding_coarse bit[768]
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
```sqlite
|
||||||
|
insert into vec_movies(rowid, synopsis_embedding, synopsis_embedding_coarse)
|
||||||
|
VALUES (:id, :vector, vec_quantize_binary(:vector));
|
||||||
|
```
|
||||||
|
|
||||||
|
```sqlite
|
||||||
|
with coarse_matches as (
|
||||||
|
select
|
||||||
|
rowid,
|
||||||
|
synopsis_embedding
|
||||||
|
from vec_movies
|
||||||
|
where synopsis_embedding_coarse match vec_quantize_binary(:query)
|
||||||
|
order by distance
|
||||||
|
limit 20 * 8
|
||||||
|
),
|
||||||
|
select
|
||||||
|
rowid,
|
||||||
|
vec_distance_L2(synopsis_embedding, :query)
|
||||||
|
from coarse_matches
|
||||||
|
order by 2
|
||||||
|
limit 20;
|
||||||
|
```
|
||||||
|
|
||||||
|
# Benchmarks
|
||||||
|
|
||||||
|
## Model support
|
||||||
|
|
||||||
|
Certain embedding models, like [Nomic](https://nomic.ai/)'s
|
||||||
|
[`nomic-embed-text-v1.5`](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5)
|
||||||
|
text embedding model and
|
||||||
|
[mixedbread.ai](https://www.mixedbread.ai/blog/mxbai-embed-2d-large-v1)'s
|
||||||
|
[`mxbai-embed-large-v1`](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)
|
||||||
|
are specifically trained to perform well after binary quantization.
|
||||||
|
|
||||||
|
Other embeddings models may not, but you can still try BQ and see if it works
|
||||||
|
for your datasets. Chances are, if your vectors are normalized (ie between
|
||||||
|
`-1.0` and `1.0`) there's a good chance you will see acceptable results with BQ.
|
||||||
0
site/guides/hybrid-search.md
Normal file
0
site/guides/hybrid-search.md
Normal file
49
site/guides/matryoshka.md
Normal file
49
site/guides/matryoshka.md
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Matryoshka (Adaptive-Length) Embeddings
|
||||||
|
|
||||||
|
Matryoshka embeddings are a new class of embedding models introduced in the
|
||||||
|
TODO-YYY paper [_TODO title_](https://arxiv.org/abs/2205.13147). They allow one
|
||||||
|
to truncate excess dimensions in large vector, without lossing much quality.
|
||||||
|
|
||||||
|
Let's say your embedding model generate 1024-dimensional vectors. If you have 1
|
||||||
|
million of these 1024-dimensional vectors, they would take up `4.096 GB` of
|
||||||
|
space! You're not able to reduce the dimensions without lossing a lot of
|
||||||
|
quality - if you were to remove half of the dimensions 512-dimensional vectors,
|
||||||
|
you could expect to also lose 50% or more of the quality of results. There are
|
||||||
|
other dimensional-reduction techniques, like [PCA](#TODO), but this requires a
|
||||||
|
complicated and expensive training process.
|
||||||
|
|
||||||
|
Matryoshka embeddings, on the other hand, _can_ be truncated, without losing
|
||||||
|
quality. Using [`mixedbread.ai`](#TODO) `mxbai-embed-large-v1` model, they claim
|
||||||
|
that
|
||||||
|
|
||||||
|
They are called "Matryoshka" embeddings because ... TODO
|
||||||
|
|
||||||
|
## Matryoshka Embeddings with `sqlite-vec`
|
||||||
|
|
||||||
|
You can use a combination of [`vec_slice()`](/api-reference#vec_slice) and
|
||||||
|
[`vec_normalize()`](/api-reference#vec_slice) on Matryoshka embeddings to
|
||||||
|
truncate.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
select
|
||||||
|
vec_normalize(vec_slice(title_embeddings, 0, 256)) as title_embeddings_256d
|
||||||
|
from vec_articles;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Benchmarks
|
||||||
|
|
||||||
|
## Suppported Models
|
||||||
|
|
||||||
|
https://supabase.com/blog/matryoshka-embeddings#which-granularities-were-openais-text-embedding-3-models-trained-on
|
||||||
|
|
||||||
|
`text-embedding-3-small`: 1536, 512 `text-embedding-3-large`: 3072, 1024, 256
|
||||||
|
|
||||||
|
https://x.com/ZainHasan6/status/1757519325202686255
|
||||||
|
|
||||||
|
`text-embeddings-3-large:` 3072, 1536, 1024, 512
|
||||||
|
|
||||||
|
https://www.mixedbread.ai/blog/binary-mrl
|
||||||
|
|
||||||
|
`mxbai-embed-large-v1`: 1024, 512, 256, 128, 64
|
||||||
|
|
||||||
|
`nomic-embed-text-v1.5`: 768, 512, 256, 128, 64
|
||||||
4
site/guides/performance.md
Normal file
4
site/guides/performance.md
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
- page_size
|
||||||
|
- memory mapping
|
||||||
|
- in-memory index
|
||||||
|
- chunk_size (?)
|
||||||
4
site/guides/rag.md
Normal file
4
site/guides/rag.md
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
# Retrival Augmented Generation (RAG)
|
||||||
|
|
||||||
|
- "memories"?
|
||||||
|
- chunking
|
||||||
27
site/guides/scalar-quant.md
Normal file
27
site/guides/scalar-quant.md
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
# Scalar Quantization (SQ)
|
||||||
|
|
||||||
|
"Quantization" refers to a variety of methods and techniques for reducing the
|
||||||
|
size of vectors in a vector index. **Scalar quantization** (SQ) refers to a
|
||||||
|
specific technique where each individual floating point element in a vector is
|
||||||
|
scaled to a small element type, like `float16`, `int8`.
|
||||||
|
|
||||||
|
Most embedding models generate `float32` vectors. Each `float32` takes up 4
|
||||||
|
bytes of space. This can add up, especially when working with a large amount of
|
||||||
|
vectors or vectors with many dimensions. However, if you scale them to `float16`
|
||||||
|
or `int8` vectors, they only take up 2 bytes of space and 1 bytes of space
|
||||||
|
respectively, saving you precious space at the expense of some quality.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
select vec_quantize_float16(vec_f32('[]'), 'unit');
|
||||||
|
select vec_quantize_int8(vec_f32('[]'), 'unit');
|
||||||
|
|
||||||
|
select vec_quantize('float16', vec_f32('...'));
|
||||||
|
select vec_quantize('int8', vec_f32('...'));
|
||||||
|
select vec_quantize('bit', vec_f32('...'));
|
||||||
|
|
||||||
|
select vec_quantize('sqf16', vec_f32('...'));
|
||||||
|
select vec_quantize('sqi8', vec_f32('...'));
|
||||||
|
select vec_quantize('bq2', vec_f32('...'));
|
||||||
|
```
|
||||||
|
|
||||||
|
## Benchmarks
|
||||||
0
site/guides/semantic-search.md
Normal file
0
site/guides/semantic-search.md
Normal file
|
|
@ -9,14 +9,14 @@ hero:
|
||||||
actions:
|
actions:
|
||||||
- theme: brand
|
- theme: brand
|
||||||
text: Getting Started
|
text: Getting Started
|
||||||
link: /getting-started
|
link: /introduction
|
||||||
- theme: alt
|
- theme: alt
|
||||||
text: API Reference
|
text: API Reference
|
||||||
link: /api-reference
|
link: /api-reference
|
||||||
|
|
||||||
features:
|
features:
|
||||||
- title: Runs everywhere
|
- title: Runs everywhere
|
||||||
details: On the server, in the browser with WASM, mobile devices, and more!
|
details: On laptops, servers, mobile devices, browsers with WASM, Raspberry Pis, and more!
|
||||||
- title: Bindings for many languages
|
- title: Bindings for many languages
|
||||||
details: Python, Ruby, Node.js/Deno/Bun, Go, Rust, and more!
|
details: Python, Ruby, Node.js/Deno/Bun, Go, Rust, and more!
|
||||||
- title: Only SQL
|
- title: Only SQL
|
||||||
|
|
@ -24,49 +24,24 @@ features:
|
||||||
---
|
---
|
||||||
|
|
||||||
```sqlite
|
```sqlite
|
||||||
|
-- store 768-dimensional vectors in a vec0 virtual table
|
||||||
create virtual table vec_movies using vec0(
|
create virtual table vec_movies using vec0(
|
||||||
synopsis_embedding float[768]
|
synopsis_embedding float[768]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- insert vectors into the table, as JSON or compact BLOBs
|
||||||
insert into vec_movies(rowid, synopsis_embedding)
|
insert into vec_movies(rowid, synopsis_embedding)
|
||||||
select
|
select
|
||||||
rowid,
|
rowid,
|
||||||
embed(synopsis) as synopsis_embedding
|
embed(synopsis) as synopsis_embedding
|
||||||
from movies;
|
from movies;
|
||||||
|
|
||||||
select rowid, distance
|
-- KNN search!
|
||||||
|
select
|
||||||
|
rowid,
|
||||||
|
distance
|
||||||
from vec_movies
|
from vec_movies
|
||||||
where synopsis_embedding match embed('scary futuristic movies')
|
where synopsis_embedding match embed('scary futuristic movies')
|
||||||
order by distance
|
order by distance
|
||||||
limit 20;
|
limit 20;
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
<div class="language-sqlite vp-adaptive-theme"><button title="Copy Code" class="copy"></button><span class="lang">sqlite</span><pre class="shiki shiki-themes github-light github-dark vp-code"><code><span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">create</span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> virtual</span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> table</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">using</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec0(</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding </span><span id="xxx"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">float</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">[768]</span></span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">);</span></span>
|
|
||||||
<span class="line"></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">insert into</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies(rowid, synopsis_embedding)</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> select</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> rowid,</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> embed(synopsis) </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">as</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> from</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> movies;</span></span>
|
|
||||||
<span class="line"></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">select</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> rowid, distance</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">from</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">where</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">match</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> embed(</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF;">'scary futuristic movies'</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">)</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">order by</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> distance</span></span>
|
|
||||||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">limit</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF;"> 20</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">;</span></span></code></pre></div>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
//document.querySelector('#xxx').style.background = 'red'
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<script setup>
|
|
||||||
import { onMounted } from 'vue'
|
|
||||||
|
|
||||||
onMounted(() => {
|
|
||||||
document.querySelector('#xxx').style.background = 'red';
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
import { defineConfig } from "vitepress";
|
|
||||||
import { readFileSync } from "node:fs";
|
import { readFileSync } from "node:fs";
|
||||||
import { join, dirname } from "node:path";
|
import { join, dirname } from "node:path";
|
||||||
import { fileURLToPath } from "node:url";
|
import { fileURLToPath } from "node:url";
|
||||||
|
|
||||||
const PROJECT = "sqlite-vec";
|
const PROJECT = "sqlite-vec";
|
||||||
|
|
||||||
const VERSION = readFileSync(
|
const VERSION = readFileSync(
|
||||||
join(dirname(fileURLToPath(import.meta.url)), "..", "VERSION"),
|
join(dirname(fileURLToPath(import.meta.url)), "..", "VERSION"),
|
||||||
"utf8"
|
"utf8"
|
||||||
|
|
|
||||||
BIN
site/public/fonts/ZillaSlab-SemiBold.otf
Normal file
BIN
site/public/fonts/ZillaSlab-SemiBold.otf
Normal file
Binary file not shown.
BIN
site/public/fonts/ZillaSlab-SemiBold.ttf
Normal file
BIN
site/public/fonts/ZillaSlab-SemiBold.ttf
Normal file
Binary file not shown.
BIN
site/public/fonts/ZillaSlab-SemiBold.woff
Normal file
BIN
site/public/fonts/ZillaSlab-SemiBold.woff
Normal file
Binary file not shown.
BIN
site/public/fonts/ZillaSlab-SemiBold.woff2
Normal file
BIN
site/public/fonts/ZillaSlab-SemiBold.woff2
Normal file
Binary file not shown.
17
site/public/logo.dark.svg
Normal file
17
site/public/logo.dark.svg
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
<svg width="256" height="256" viewBox="0 0 256 256" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="256" height="256" fill="#1E1E2E" rx="30"/>
|
||||||
|
<rect x="60.0452" y="174" width="132.873" height="41.2651" fill="#CDD6F4"/>
|
||||||
|
<path d="M80.2651 194.633C80.2651 206.028 71.0276 215.265 59.6325 215.265C48.2375 215.265 39 206.028 39 194.633C39 183.237 48.2375 174 59.6325 174C71.0276 174 80.2651 183.237 80.2651 194.633Z" fill="#CDD6F4"/>
|
||||||
|
<path d="M214.789 194.633C214.789 206.028 205.552 215.265 194.157 215.265C182.762 215.265 173.524 206.028 173.524 194.633C173.524 183.237 182.762 174 194.157 174C205.552 174 214.789 183.237 214.789 194.633Z" fill="#CDD6F4"/>
|
||||||
|
<rect x="61.0452" y="107.867" width="45.3916" height="41.2651" fill="#89DCEB"/>
|
||||||
|
<rect x="151.828" y="107.867" width="45.3916" height="41.2651" fill="#89DCEB"/>
|
||||||
|
<circle cx="61.0452" cy="128.5" r="20.6325" fill="#89DCEB"/>
|
||||||
|
<circle cx="61.0452" cy="128.5" r="20.6325" fill="#89DCEB"/>
|
||||||
|
<circle cx="105.611" cy="128.5" r="20.6325" fill="#89DCEB"/>
|
||||||
|
<circle cx="150.178" cy="128.5" r="20.6325" fill="#89DCEB"/>
|
||||||
|
<circle cx="194.744" cy="128.5" r="20.6325" fill="#89DCEB"/>
|
||||||
|
<circle cx="61.0452" cy="62.6325" r="20.6325" fill="#CBA6F7"/>
|
||||||
|
<circle cx="105.611" cy="62.6325" r="20.6325" fill="#CBA6F7"/>
|
||||||
|
<circle cx="150.178" cy="62.6325" r="20.6325" fill="#CBA6F7"/>
|
||||||
|
<circle cx="194.744" cy="62.6325" r="20.6325" fill="#CBA6F7"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 1.4 KiB |
17
site/public/logo.light.svg
Normal file
17
site/public/logo.light.svg
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
<svg width="256" height="256" viewBox="0 0 256 256" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="256" height="256" fill="#EFF1F5" rx="30"/>
|
||||||
|
<rect x="60.0452" y="174" width="132.873" height="41.2651" fill="#4C4F69"/>
|
||||||
|
<path d="M80.2651 194.633C80.2651 206.028 71.0276 215.265 59.6325 215.265C48.2375 215.265 39 206.028 39 194.633C39 183.237 48.2375 174 59.6325 174C71.0276 174 80.2651 183.237 80.2651 194.633Z" fill="#4C4F69"/>
|
||||||
|
<path d="M214.789 194.633C214.789 206.028 205.552 215.265 194.157 215.265C182.762 215.265 173.524 206.028 173.524 194.633C173.524 183.237 182.762 174 194.157 174C205.552 174 214.789 183.237 214.789 194.633Z" fill="#4C4F69"/>
|
||||||
|
<rect x="61.0452" y="107.867" width="45.3916" height="41.2651" fill="#04A5E5"/>
|
||||||
|
<rect x="151.828" y="107.867" width="45.3916" height="41.2651" fill="#04A5E5"/>
|
||||||
|
<circle cx="61.0452" cy="128.5" r="20.6325" fill="#04A5E5"/>
|
||||||
|
<circle cx="61.0452" cy="128.5" r="20.6325" fill="#04A5E5"/>
|
||||||
|
<circle cx="105.611" cy="128.5" r="20.6325" fill="#04A5E5"/>
|
||||||
|
<circle cx="150.178" cy="128.5" r="20.6325" fill="#04A5E5"/>
|
||||||
|
<circle cx="194.744" cy="128.5" r="20.6325" fill="#04A5E5"/>
|
||||||
|
<circle cx="61.0452" cy="62.6325" r="20.6325" fill="#8839EF"/>
|
||||||
|
<circle cx="105.611" cy="62.6325" r="20.6325" fill="#8839EF"/>
|
||||||
|
<circle cx="150.178" cy="62.6325" r="20.6325" fill="#8839EF"/>
|
||||||
|
<circle cx="194.744" cy="62.6325" r="20.6325" fill="#8839EF"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 1.4 KiB |
|
|
@ -1,13 +1,65 @@
|
||||||
# Using `sqlite-vec` in Node.js, Deno, and Bun
|
# Using `sqlite-vec` in Node.js, Deno, and Bun
|
||||||
|
|
||||||
```bash
|
To use `sqlite-vec` in Node.js, Deno or Bun, install the
|
||||||
|
[`sqlite-vec` NPM package](https://npmjs.com/package/sqlite-vec) using your
|
||||||
|
favorite package manager:
|
||||||
|
|
||||||
|
::: code-group
|
||||||
|
|
||||||
|
```bash [npm]
|
||||||
npm install sqlite-vec
|
npm install sqlite-vec
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```bash [Bun]
|
||||||
|
bun install sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash [Deno]
|
||||||
|
deno add npm:sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Once installed, use the `sqliteVec.load()` function to load `sqlite-vec` SQL
|
||||||
|
functions into a SQLite connection.
|
||||||
|
|
||||||
|
```js
|
||||||
|
import * as sqliteVec from "sqlite-vec";
|
||||||
|
import Database from "better-sqlite3";
|
||||||
|
|
||||||
|
const db = new Database(":memory:");
|
||||||
|
sqliteVec.load(db);
|
||||||
|
|
||||||
|
const { vec_version } = db
|
||||||
|
.prepare("select vec_version() as vec_version;")
|
||||||
|
.get();
|
||||||
|
|
||||||
|
console.log(`vec_version=${vec_version}`);
|
||||||
|
```
|
||||||
|
|
||||||
|
The `load()` function is compatable with
|
||||||
|
[`better-sqlite3`](https://github.com/WiseLibs/better-sqlite3),
|
||||||
|
[`node-sqlite3`](https://github.com/TryGhost/node-sqlite3),
|
||||||
|
[`js:@db/sqlite`](https://jsr.io/@db/sqlite) (Deno), and
|
||||||
|
[`bun:sqlite`](https://bun.sh/docs/api/sqlite).
|
||||||
|
|
||||||
## Working with vectors in JavaScript
|
## Working with vectors in JavaScript
|
||||||
|
|
||||||
|
if your vectors are represented as an array of numbers, use
|
||||||
|
[Float32Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Float32Array),
|
||||||
|
use the
|
||||||
|
[`.buffer`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray/buffer)
|
||||||
|
accessor to insert the underlying ArrayBuffer.
|
||||||
|
|
||||||
|
```js
|
||||||
|
const embedding = new Float32Array([0.1, 0.2, 0.3]);
|
||||||
|
const stmt = db.prepare("INSERT INTO vss_demo VALUES (?)");
|
||||||
|
stmt.run(embedding.buffer);
|
||||||
|
|
||||||
|
|
||||||
## Node.js
|
## Node.js
|
||||||
|
|
||||||
## Deno
|
## Deno
|
||||||
|
|
||||||
## Bun
|
## Bun
|
||||||
|
```
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,21 @@ title: sqlite-vec in Python
|
||||||
|
|
||||||
# Using `sqlite-vec` in Python
|
# Using `sqlite-vec` in Python
|
||||||
|
|
||||||
|
[](https://pypi.org/project/sqlite-vec/)
|
||||||
|
|
||||||
|
To use `sqlite-vec` from Python, install the
|
||||||
|
[`sqlite-vec` PyPi package](https://pypi.org/project/sqlite-vec/) using your
|
||||||
|
favorite Python package manager:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install sqlite-vec
|
pip install sqlite-vec
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Once installed, use the `sqlite_vec.load()` function to load `sqlite-vec` SQL
|
||||||
|
functions into a SQLite connection.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import sqlite
|
import sqlite3
|
||||||
import sqlite_vec
|
import sqlite_vec
|
||||||
|
|
||||||
db = sqlite3.connect(":memory:")
|
db = sqlite3.connect(":memory:")
|
||||||
|
|
@ -19,13 +28,122 @@ db.enable_load_extension(False)
|
||||||
|
|
||||||
vec_version, = db.execute("select vec_version()").fetchone()
|
vec_version, = db.execute("select vec_version()").fetchone()
|
||||||
print(f"vec_version={vec_version}")
|
print(f"vec_version={vec_version}")
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Working with Vectors
|
## Working with Vectors
|
||||||
|
|
||||||
### Vectors as Lists
|
### Lists
|
||||||
|
|
||||||
### `numpy` Arrays
|
If the vectors you are working with are provided as a list of floats, you can convert them into the compact BLOB format that `sqlite-vec` uses with [`struct.pack()`](https://docs.python.org/3/library/struct.html#struct.pack).
|
||||||
|
|
||||||
|
```python
|
||||||
|
import struct
|
||||||
|
|
||||||
|
def serialize(vector: List[float]) -> bytes:
|
||||||
|
""" serializes a list of floats into a compact "raw bytes" format """
|
||||||
|
return struct.pack('%sf' % len(vector), *vector)
|
||||||
|
|
||||||
|
|
||||||
|
embedding = [0.1, 0.2, 0.3, 0.4]
|
||||||
|
result = db.execute('select vec_length(?)', [serialize(embedding)]).fetchone()[0]
|
||||||
|
|
||||||
|
print(result) # 4
|
||||||
|
```
|
||||||
|
|
||||||
|
### NumPy Arrays
|
||||||
|
|
||||||
|
If your vectors are from `numpy` arrays, the Python SQLite package allows you to pass it along as-is. Make sure you convert your array elements to 32-bit floats with [`.astype(np.float32)`](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html), as some embedding services will use `np.float64` elements.
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
embedding = np.array([0.1, 0.2, 0.3, 0.4])
|
||||||
|
result = db.execute('select vec_length(?)', [embedding.astype(np.float32)]).fetchone()[0]
|
||||||
|
print(result) # 4
|
||||||
|
```
|
||||||
|
|
||||||
|
## Recipes
|
||||||
|
|
||||||
|
### OpenAI
|
||||||
|
|
||||||
|
https://platform.openai.com/docs/guides/embeddings/what-are-embeddings?lang=python
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
import sqlite3
|
||||||
|
import sqlite_vec
|
||||||
|
|
||||||
|
texts = [
|
||||||
|
|
||||||
|
'Capri-Sun is a brand of juice concentrate–based drinks manufactured by the German company Wild and regional licensees.',
|
||||||
|
'Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.',
|
||||||
|
'George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.',
|
||||||
|
'Alan Mathison Turing was an English mathematician, computer scientist, logician, cryptanalyst, philosopher and theoretical biologist.',
|
||||||
|
'Alaqua Cox is a Native American (Menominee) actress.'
|
||||||
|
]
|
||||||
|
|
||||||
|
# change ':memory:' to a filepath to persist data
|
||||||
|
db = sqlite3.connect(':memory:')
|
||||||
|
db.enable_load_extension(True)
|
||||||
|
sqlite_vec.load(db)
|
||||||
|
db.enable_load_extension(False)
|
||||||
|
|
||||||
|
client = OpenAI()
|
||||||
|
|
||||||
|
response = client.embeddings.create(
|
||||||
|
input=[texts],
|
||||||
|
model="text-embedding-3-small"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response.data[0].embedding)
|
||||||
|
```
|
||||||
|
|
||||||
|
### llamafile
|
||||||
|
|
||||||
|
https://github.com/Mozilla-Ocho/llamafile
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
### llama-cpp-python
|
||||||
|
|
||||||
|
https://github.com/abetlen/llama-cpp-python
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
### sentence-transformers (etc.)
|
||||||
|
|
||||||
|
https://github.com/UKPLab/sentence-transformers
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
## Using an up-to-date version of SQLite
|
## Using an up-to-date version of SQLite
|
||||||
|
|
||||||
|
Some features of `sqlite-vec` will require an up-to-date SQLite library. You can see what version of SQLite your Python environment uses with [`sqlite3.sqlite-version`](https://docs.python.org/3/library/sqlite3.html#sqlite3.sqlite_version), or with this one-line command:
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -c 'import sqlite3; print(sqlite3.sqlite_version)'
|
||||||
|
```
|
||||||
|
|
||||||
|
Currently, **SQLite version 3.41 or higher** is recommended but not required. `sqlite-vec` will work with older version, but certain features and queries will only work correctly in >=3.41.
|
||||||
|
|
||||||
|
To "upgrade" the SQLite version your Python installation uses, you have a few options.
|
||||||
|
|
||||||
|
### Compile your own SQLite version
|
||||||
|
|
||||||
|
You can compile an up-to-date version of SQLite and use some system environment variables (like `LD_PRELOAD` and `DYLD_LIBRARY_PATH`) to force Python to use a different SQLite library. [This guide](https://til.simonwillison.net/sqlite/sqlite-version-macos-python) goes into this approach in more details.
|
||||||
|
|
||||||
|
Although compiling SQLite can be straightforward, there are a lot of different compilation options to consider, which makes it confusing. This also doesn't work with Windows, which statically compiles its own SQLite library.
|
||||||
|
|
||||||
|
### Use `pysqlite3`
|
||||||
|
|
||||||
|
[`pysqlite3`](https://github.com/coleifer/pysqlite3) is a 3rd party PyPi package that bundles an up-to-date SQLite library as a separate pip package.
|
||||||
|
|
||||||
|
While it's mostly compatible with the Python `sqlite3` module, there are a few rare edge cases where the APIs don't match.
|
||||||
|
|
||||||
|
### Upgrading your Python version
|
||||||
|
|
||||||
|
Sometimes installing a latest version of Python will "magically" upgrade your SQLite version as well. This is a nuclear option, as upgrading Python installations can be quite the hassle, but most Python 3.12 builds will have a very recent SQLite version.
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
# Using `sqlite-vec` in Ruby
|
# Using `sqlite-vec` in Ruby
|
||||||
|
|
||||||
|
https://rubygems.org/gems/sqlite-vec
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
gem install sqlite-vec
|
gem install sqlite-vec
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -479,13 +479,12 @@ def test_vec_quantize_i8():
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
assert vec_quantize_i8() == 111
|
assert vec_quantize_i8() == 111
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="TODO")
|
@pytest.mark.skip(reason="TODO")
|
||||||
def test_vec_quantize_binary():
|
def test_vec_quantize_binary():
|
||||||
vec_quantize_binary = lambda *args: db.execute(
|
vec_quantize_binary = lambda *args, input="?": db.execute(
|
||||||
"select vec_quantize_binary()", args
|
f"select vec_quantize_binary({input})", args
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
assert vec_quantize_binary() == 111
|
assert vec_quantize_binary("[-1, -1, -1, -1, 1, 1, 1, 1]") == 111
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="TODO")
|
@pytest.mark.skip(reason="TODO")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue