Merge branch 'main' of github.com:asg017/sqlite-vec into main

This commit is contained in:
Alex Garcia 2024-07-25 11:23:00 -07:00
commit 156d6c1e3b
19 changed files with 573 additions and 46 deletions

View file

@ -8,10 +8,28 @@ python3 bench/bench.py \
``` ```
``` ```
python3 bench/bench.py \ python3 bench.py \
-n "sift1m" \ -n "sift1m" \
-i sift/sift_base.fvecs \ -i ../../sift/sift_base.fvecs \
-q sift/sift_query.fvecs \ -q ../../sift/sift_query.fvecs \
--sample 10000 --qsample 100 \ --qsample 100 \
-k 10 -k 20
``` ```
```
python3 bench.py \
-n "sift1m" \
-i ../../sift/sift_base.fvecs \
-q ../../sift/sift_query.fvecs \
--qsample 100 \
-x faiss,vec-scalar.4096,vec-static,vec-vec0.4096.16,vec-vec0.8192.1024,usearch,duckdb,hnswlib,numpy \
-k 20
```
```
python bench.py -n gist -i ../../gist/gist_base.fvecs -q ../../gist/gist_query.fvecs --qsample 100 -k 20 --sample 500000 -x faiss,vec-static,vec-scalar.8192,vec-scalar.16384,vec-scalar.32768,vec-vec0.16384.64,vec-vec0.16384.128,vec-vec0.16384.256,vec-vec0.16384.512,vec-vec0.16384.1024,vec-vec0.16384.2048
```
python bench.py -n gist -i ../../gist/gist_base.fvecs -q ../../gist/gist_query.fvecs --qsample 100 -k 20 --sample 500000 -x faiss,vec-static,sentence-transformers,numpy

View file

@ -0,0 +1,51 @@
import numpy as np
import numpy.typing as npt
import time
def cosine_similarity(
vec: npt.NDArray[np.float32], mat: npt.NDArray[np.float32], do_norm: bool = True
) -> npt.NDArray[np.float32]:
sim = vec @ mat.T
if do_norm:
sim /= np.linalg.norm(vec) * np.linalg.norm(mat, axis=1)
return sim
def topk(
vec: npt.NDArray[np.float32],
mat: npt.NDArray[np.float32],
k: int = 5,
do_norm: bool = True,
) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.float32]]:
sim = cosine_similarity(vec, mat, do_norm=do_norm)
# Rather than sorting all similarities and taking the top K, it's faster to
# argpartition and then just sort the top K.
# The difference is O(N logN) vs O(N + k logk)
indices = np.argpartition(-sim, kth=k)[:k]
top_indices = np.argsort(-sim[indices])
return indices[top_indices], sim[top_indices]
def ivecs_read(fname):
a = np.fromfile(fname, dtype="int32")
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view("float32")
base = fvecs_read("../../sift/sift_base.fvecs")
queries = fvecs_read("../../sift/sift_query.fvecs")
k = 20
times = []
results = []
for idx, q in enumerate(queries[0:20]):
t0 = time.time()
result = topk(q, base, k=k)
results.append(result)
times.append(time.time() - t0)
print(np.__version__)
print(np.mean(times))

View file

@ -14,6 +14,10 @@ from dataclasses import dataclass
from typing import List from typing import List
import duckdb
import pyarrow as pa
from sentence_transformers.util import semantic_search
@dataclass @dataclass
class BenchResult: class BenchResult:
@ -52,13 +56,13 @@ def topk(
def ivecs_read(fname): def ivecs_read(fname):
a = np.fromfile(fname, dtype="int32") a = np.fromfile(fname, dtype="int32",)
d = a[0] d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy() return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname): def fvecs_read(fname, sample):
return ivecs_read(fname).view("float32") return ivecs_read(fname).view("float32")[:sample]
def bench_hnsw(base, query): def bench_hnsw(base, query):
@ -80,8 +84,6 @@ def bench_hnsw(base, query):
for idx, q in enumerate(query): for idx, q in enumerate(query):
t0 = time.time() t0 = time.time()
result = p.knn_query(q, k=5) result = p.knn_query(q, k=5)
if idx < 5:
print(result[0])
results.append(result) results.append(result)
times.append(time.time() - t0) times.append(time.time() - t0)
print(time.time() - t) print(time.time() - t)
@ -131,7 +133,7 @@ def bench_sqlite_vec(base, query, page_size, chunk_size, k) -> BenchResult:
db = sqlite3.connect(":memory:") db = sqlite3.connect(":memory:")
db.execute(f"PRAGMA page_size = {page_size}") db.execute(f"PRAGMA page_size = {page_size}")
db.enable_load_extension(True) db.enable_load_extension(True)
db.load_extension("./dist/vec0") db.load_extension("../../dist/vec0")
db.execute( db.execute(
f""" f"""
create virtual table vec_sift1m using vec0( create virtual table vec_sift1m using vec0(
@ -171,12 +173,12 @@ def bench_sqlite_vec(base, query, page_size, chunk_size, k) -> BenchResult:
return BenchResult(f"sqlite-vec vec0 ({page_size}|{chunk_size})", build_time, times) return BenchResult(f"sqlite-vec vec0 ({page_size}|{chunk_size})", build_time, times)
def bench_sqlite_normal(base, query, page_size, k) -> BenchResult: def bench_sqlite_vec_scalar(base, query, page_size, k) -> BenchResult:
print(f"sqlite-normal") print(f"sqlite-vec-scalar")
db = sqlite3.connect(":memory:") db = sqlite3.connect(":memory:")
db.enable_load_extension(True) db.enable_load_extension(True)
db.load_extension("./dist/vec0") db.load_extension("../../dist/vec0")
db.execute(f"PRAGMA page_size={page_size}") db.execute(f"PRAGMA page_size={page_size}")
db.execute(f"create table sift1m(vector);") db.execute(f"create table sift1m(vector);")
@ -207,8 +209,102 @@ def bench_sqlite_normal(base, query, page_size, k) -> BenchResult:
[q.tobytes(), k], [q.tobytes(), k],
).fetchall() ).fetchall()
times.append(time.time() - t0) times.append(time.time() - t0)
return BenchResult(f"sqlite-vec normal ({page_size})", build_time, times) return BenchResult(f"sqlite-vec-scalar ({page_size})", build_time, times)
def bench_libsql(base, query, page_size, k) -> BenchResult:
print(f"libsql")
dimensions = base.shape[1]
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
assert db.execute("select 'vector' in (select name from pragma_function_list)").fetchone()[0] == 1
db.execute(f"PRAGMA page_size={page_size}")
db.execute(f"create table vectors(vector f32_blob({dimensions}));")
# TODO: only does DiskANN?
#db.execute("CREATE INDEX vectors_idx ON vectors (libsql_vector_idx(vector, 'metric=cosine'))")
t = time.time()
with db:
db.executemany(
"insert into vectors(vector) values (?)",
list(map(lambda x: [x.tobytes()], base)),
)
build_time = time.time() - t
times = []
results = []
t = time.time()
for (
idx,
q,
) in enumerate(query):
t0 = time.time()
result = db.execute(
"""
select
rowid,
vector_distance_cos(?, vector) as distance
FROM vectors
order by 2
limit ?
""",
[q.tobytes(), k],
).fetchall()
times.append(time.time() - t0)
return BenchResult(f"libsql ({page_size})", build_time, times)
def register_np(db, array, name):
ptr = array.__array_interface__["data"][0]
nvectors, dimensions = array.__array_interface__["shape"]
element_type = array.__array_interface__["typestr"]
assert element_type == "<f4"
name_escaped = db.execute("select printf('%w', ?)", [name]).fetchone()[0]
db.execute(
"insert into temp.vec_static_blobs(name, data) select ?, vec_static_blob_from_raw(?, ?, ?, ?)",
[name, ptr, element_type, dimensions, nvectors],
)
db.execute(
f'create virtual table "{name_escaped}" using vec_static_blob_entries({name_escaped})'
)
def bench_sqlite_vec_static(base, query, k) -> BenchResult:
print(f"sqlite-vec static")
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
db.load_extension("../../dist/vec0")
t = time.time()
register_np(db, base, "base")
build_time = time.time() - t
times = []
results = []
for (
idx,
q,
) in enumerate(query):
t0 = time.time()
result = db.execute(
"""
select
rowid
from base
where vector match ?
and k = ?
order by distance
""",
[q.tobytes(), k],
).fetchall()
times.append(time.time() - t0)
return BenchResult(f"sqlite-vec static", build_time, times)
def bench_faiss(base, query, k) -> BenchResult: def bench_faiss(base, query, k) -> BenchResult:
dimensions = base.shape[1] dimensions = base.shape[1]
@ -246,6 +342,45 @@ def bench_lancedb(base, query, k) -> BenchResult:
times.append(time.time() - t0) times.append(time.time() - t0)
return BenchResult("lancedb", build_time, times) return BenchResult("lancedb", build_time, times)
def bench_duckdb(base, query, k) -> BenchResult:
dimensions = base.shape[1]
db = duckdb.connect(":memory:")
db.execute(f"CREATE TABLE t(vector float[{dimensions}])")
t0 = time.time()
pa_base = pa.Table.from_arrays([pa.array(list(base))], names=['vector'])
pa_base
db.execute(f"INSERT INTO t(vector) SELECT vector::float[{dimensions}] FROM pa_base")
build_time = time.time() - t0
times = []
for q in query:
t0 = time.time()
result = db.execute(
f"""
SELECT
rowid,
array_cosine_similarity(vector, ?::float[{dimensions}])
FROM t
ORDER BY 2 DESC
LIMIT ?
""", [q, k]).fetchall()
times.append(time.time() - t0)
return BenchResult("duckdb", build_time, times)
def bench_sentence_transformers(base, query, k) -> BenchResult:
print("sentence-transformers")
dimensions = base.shape[1]
t0 = time.time()
build_time = time.time() - t0
times = []
for q in query:
t0 = time.time()
result = semantic_search(q, base, top_k=k)
times.append(time.time() - t0)
return BenchResult("sentence-transformers", build_time, times)
# def bench_chroma(base, query, k): # def bench_chroma(base, query, k):
# chroma_client = chromadb.Client() # chroma_client = chromadb.Client()
@ -297,23 +432,65 @@ from rich.console import Console
from rich.table import Table from rich.table import Table
def suite(name, base, query, k): def suite(name, base, query, k, benchmarks):
print(f"Starting benchmark suite: {name} {base.shape}, k={k}") print(f"Starting benchmark suite: {name} {base.shape}, k={k}")
results = [] results = []
# n = bench_chroma(base[:40000], query, k=k)
# n = bench_usearch_npy(base, query, k=k) for b in benchmarks.split(","):
# n = bench_usearch_special(base, query, k=k) if b == "faiss":
results.append(bench_faiss(base, query, k=k)) results.append(bench_faiss(base, query, k=k))
results.append(bench_hnsw_bf(base, query, k=k)) elif b == "vec-static":
# n = bench_sqlite_vec(base, query, 4096, 1024, k=k) results.append(bench_sqlite_vec_static(base, query, k=k))
# n = bench_sqlite_vec(base, query, 32768, 1024, k=k) elif b.startswith("vec-scalar"):
results.append(bench_sqlite_vec(base, query, 32768, 256, k=k)) _, page_size = b.split('.')
# n = bench_sqlite_vec(base, query, 16384, 64, k=k) results.append(bench_sqlite_vec_scalar(base, query, page_size, k=k))
# n = bench_sqlite_vec(base, query, 16384, 32, k=k) elif b.startswith("libsql"):
results.append(bench_sqlite_normal(base, query, 8192, k=k)) _, page_size = b.split('.')
results.append(bench_lancedb(base, query, k=k)) results.append(bench_libsql(base, query, page_size, k=k))
results.append(bench_numpy(base, query, k=k)) elif b.startswith("vec-vec0"):
# h = bench_hnsw(base, query) _, page_size, chunk_size = b.split('.')
results.append(bench_sqlite_vec(base, query, int(page_size), int(chunk_size), k=k))
elif b == "usearch":
results.append(bench_usearch_npy(base, query, k=k))
elif b == "hnswlib":
results.append(bench_hnsw_bf(base, query, k=k))
elif b == "numpy":
results.append(bench_numpy(base, query, k=k))
elif b == "duckdb":
results.append(bench_duckdb(base, query, k=k))
elif b == "sentence-transformers":
results.append(bench_sentence_transformers(base, query, k=k))
else:
raise Exception(f"unknown benchmark {b}")
#results.append(bench_sqlite_vec(base, query, 32768, 512, k=k))
#results.append(bench_sqlite_vec(base, query, 32768, 256, k=k))
#results.append(bench_sqlite_vec_expo(base, query, k=k))
# n = bench_chroma(base[:40000], query, k=k)
# n = bench_usearch_special(base, query, k=k)
# n = bench_sqlite_vec(base, query, 4096, 1024, k=k)
# n = bench_sqlite_vec(base, query, 32768, 1024, k=k)
# blessed
### #for pgsz in [4096, 8192, 16384, 32768, 65536]:
### # for chunksz in [8, 32, 128, 512, 1024, 2048]:
### # results.append(bench_sqlite_vec(base, query, pgsz, chunksz, k=k))
### # n = bench_sqlite_vec(base, query, 16384, 64, k=k)
### # n = bench_sqlite_vec(base, query, 16384, 32, k=k)
### results.append(bench_sqlite_normal(base, query, 8192, k=k))
### results.append(bench_lancedb(base, query, k=k))
### #h = bench_hnsw(base, query)
table = Table( table = Table(
title=f"{name}: {base.shape[0]:,} {base.shape[1]}-dimension vectors, k={k}" title=f"{name}: {base.shape[0]:,} {base.shape[1]}-dimension vectors, k={k}"
@ -322,7 +499,7 @@ def suite(name, base, query, k):
table.add_column("Tool") table.add_column("Tool")
table.add_column("Build Time (ms)", justify="right") table.add_column("Build Time (ms)", justify="right")
table.add_column("Query time (ms)", justify="right") table.add_column("Query time (ms)", justify="right")
for res in results: for res in sorted(results, key=lambda x: np.mean(x.query_times_ms)):
table.add_row( table.add_row(
res.tool, duration(res.build_time_ms), duration(np.mean(res.query_times_ms)) res.tool, duration(res.build_time_ms), duration(np.mean(res.query_times_ms))
) )
@ -354,6 +531,7 @@ def parse_args():
type=int, type=int,
required=False, required=False,
help="Number of entries in base to use. Defaults all", help="Number of entries in base to use. Defaults all",
default=-1
) )
parser.add_argument( parser.add_argument(
"--qsample", "--qsample",
@ -361,6 +539,9 @@ def parse_args():
required=False, required=False,
help="Number of queries to use. Defaults all", help="Number of queries to use. Defaults all",
) )
parser.add_argument(
"-x", help="type of runs to make", default="faiss,vec-scalar.4096,vec-static,vec-vec0.4096.16,usearch,duckdb,hnswlib,numpy"
)
args = parser.parse_args() args = parser.parse_args()
return args return args
@ -369,35 +550,27 @@ def parse_args():
from pathlib import Path from pathlib import Path
def cli_read_input(input): def cli_read_input(input, sample):
input_path = Path(input) input_path = Path(input)
if input_path.suffix == ".fvecs": if input_path.suffix == ".fvecs":
return fvecs_read(input_path) return fvecs_read(input_path, sample)
if input_path.suffx == ".npy": if input_path.suffx == ".npy":
return np.fromfile(input_path, dtype="float32") return np.fromfile(input_path, dtype="float32", count=sample)
raise Exception("unknown filetype", input) raise Exception("unknown filetype", input)
def cli_read_query(query, base): def cli_read_query(query, base):
if query is None: if query is None:
return base[np.random.choice(base.shape[0], 100, replace=False), :] return base[np.random.choice(base.shape[0], 100, replace=False), :]
return cli_read_input(query) return cli_read_input(query, -1)
def main(): def main():
args = parse_args() args = parse_args()
base = cli_read_input(args.input)[: args.sample] print(args)
base = cli_read_input(args.input, args.sample)
queries = cli_read_query(args.query, base)[: args.qsample] queries = cli_read_query(args.query, base)[: args.qsample]
suite(args.name, base, queries, args.k) suite(args.name, base, queries, args.k, args.x)
from sys import argv
# base = fvecs_read("sift/sift_base.fvecs") # [:100000]
# query = fvecs_read("sift/sift_query.fvecs")[:100]
# print(base.shape)
# k = int(argv[1]) if len(argv) > 1 else 5
# suite("sift1m", base, query, k)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -0,0 +1,3 @@
#!/bin/bash
python bench.py -n gist -i ../../gist/gist_base.fvecs -q ../../gist/gist_query.fvecs --sample 750000 --qsample 200 -k 20 -x $1

View file

@ -0,0 +1,3 @@
#!/bin/bash
python bench.py -n sift1m -i ../../sift/sift_base.fvecs -q ../../sift/sift_query.fvecs --qsample 100 -k 20 -x $1

View file

@ -0,0 +1,18 @@
@name=sift1m
@i=../../sift/sift_base.fvecs
@q=../../sift/sift_query.fvecs
@qsample=100
libsql.4096
libsql.8192
faiss
vec-scalar.4096
vec-static
vec-vec0.4096.16
vec-vec0.8192.1024
vec-vec0.4096.2048
usearch
duckdb
hnswlib
numpy

2
tests/fuzz/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
*.dSYM
targets/

48
tests/fuzz/Makefile Normal file
View file

@ -0,0 +1,48 @@
TARGET_DIR=./targets
$(TARGET_DIR):
mkdir -p $@
# ASAN_OPTIONS=detect_leaks=1 ./fuzz_json -detect_leaks=1 '-trace_malloc=[12]' tmp
$(TARGET_DIR)/json: json.c $(TARGET_DIR)
/opt/homebrew/opt/llvm/bin/clang \
-fsanitize=address,fuzzer \
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
../../vendor/sqlite3.c \
../../sqlite-vec.c \
$< \
-o $@
$(TARGET_DIR)/vec0_create: vec0-create.c ../../sqlite-vec.c $(TARGET_DIR)
/opt/homebrew/opt/llvm/bin/clang \
-fsanitize=address,fuzzer \
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
../../vendor/sqlite3.c \
../../sqlite-vec.c \
$< \
-o $@
$(TARGET_DIR)/numpy: numpy.c ../../sqlite-vec.c $(TARGET_DIR)
/opt/homebrew/opt/llvm/bin/clang \
-fsanitize=address,fuzzer \
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
../../vendor/sqlite3.c \
../../sqlite-vec.c \
$< \
-o $@
$(TARGET_DIR)/exec: exec.c ../../sqlite-vec.c $(TARGET_DIR)
/opt/homebrew/opt/llvm/bin/clang \
-fsanitize=address,fuzzer \
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
../../vendor/sqlite3.c \
../../sqlite-vec.c \
$< \
-o $@
all: $(TARGET_DIR)/json $(TARGET_DIR)/numpy $(TARGET_DIR)/json $(TARGET_DIR)/exec
clean:
rm -rf $(TARGET_DIR)/*

15
tests/fuzz/README.md Normal file
View file

@ -0,0 +1,15 @@
```
ASAN_OPTIONS=detect_leaks=1 ./targets/vec0_create \
-dict=./vec0-create.dict -max_total_time=5 \
./corpus/vec0-create
```
```
export PATH="/opt/homebrew/opt/llvm/bin:$PATH"
export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
LDFLAGS="-L/opt/homebrew/opt/llvm/lib/c++ -Wl,-rpath,/opt/homebrew/opt/llvm/lib/c++"
```

View file

@ -0,0 +1 @@
aaa float[12]

View file

@ -0,0 +1 @@
aaa float[12], bbb int8[6]

30
tests/fuzz/exec.c Normal file
View file

@ -0,0 +1,30 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
int rc = SQLITE_OK;
sqlite3 *db;
sqlite3_stmt *stmt;
if(size < 1) return 0;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
const char * zSrc = sqlite3_mprintf("%.*s", size, data);
assert(zSrc);
sqlite3_exec(db, zSrc, NULL, NULL, NULL);
sqlite3_free(zSrc);
sqlite3_close(db);
return 0;
}

21
tests/fuzz/exec.dict Normal file
View file

@ -0,0 +1,21 @@
select="select"
from="from"
cname1="aaa"
cname1="bbb"
cname1="ccc"
type1="float"
type2="int8"
type3="bit"
lparen="["
rparen="]"
pk="primary key"
text="text"
distance_metric="distance_metric"
eq="="
l1="l1"
l2="l2"
cosine="cosine"
hamming="hamming"
vec_distance_l2="vec_distance_l2"
vec_distance_l1="vec_distance_l1"
comma=","

34
tests/fuzz/json.c Normal file
View file

@ -0,0 +1,34 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
int rc = SQLITE_OK;
sqlite3 *db;
sqlite3_stmt *stmt;
//rc = sqlite3_auto_extension((void (*)())sqlite3_vec_init);
//assert(rc == SQLITE_OK);
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
rc = sqlite3_prepare_v2(db, "SELECT vec_f32(cast(? as text))", -1, &stmt, NULL);
assert(rc == SQLITE_OK);
sqlite3_bind_blob(stmt, 1, data, size, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
sqlite3_close(db);
return 0;
}

42
tests/fuzz/numpy.c Normal file
View file

@ -0,0 +1,42 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
int rc = SQLITE_OK;
sqlite3 *db;
sqlite3_stmt *stmt;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
rc = sqlite3_prepare_v2(db, "select * from vec_npy_each(?)", -1, &stmt, NULL);
assert(rc == SQLITE_OK);
sqlite3_bind_blob(stmt, 1, data, size, SQLITE_STATIC);
rc = sqlite3_step(stmt);
if(rc != SQLITE_DONE || rc != SQLITE_ROW) {
sqlite3_finalize(stmt);
sqlite3_close(db);
return -1;
}
while(1) {
if(rc == SQLITE_DONE) break;
if(rc == SQLITE_ROW) continue;
sqlite3_finalize(stmt);
sqlite3_close(db);
return 1;
}
sqlite3_finalize(stmt);
sqlite3_close(db);
return 0;
}

7
tests/fuzz/numpy.dict Normal file
View file

@ -0,0 +1,7 @@
magic="\x93NUMPY"
lparen="("
rparen=")"
lbrace="{"
rbrace="}"
sq1="\""
sq2="'"

37
tests/fuzz/vec0-create.c Normal file
View file

@ -0,0 +1,37 @@
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite-vec.h"
#include "sqlite3.h"
#include <assert.h>
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
int rc = SQLITE_OK;
sqlite3 *db;
sqlite3_stmt *stmt;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
sqlite3_str * s = sqlite3_str_new(NULL);
assert(s);
sqlite3_str_appendall(s, "CREATE VIRTUAL TABLE v USING vec0(");
sqlite3_str_appendf(s, "%.*s", size, data);
sqlite3_str_appendall(s, ")");
const char * zSql = sqlite3_str_finish(s);
assert(zSql);
rc = sqlite3_prepare_v2(db, zSql, -1, &stmt, NULL);
sqlite3_free(zSql);
if(rc == SQLITE_OK) {
sqlite3_step(stmt);
}
sqlite3_finalize(stmt);
sqlite3_close(db);
return 0;
}

View file

@ -0,0 +1,16 @@
cname1="aaa"
cname1="bbb"
cname1="ccc"
type1="float"
type2="int8"
type3="bit"
lparen="["
rparen="]"
pk="primary key"
text="text"
distance_metric="distance_metric"
eq="="
l1="l1"
l2="l2"
cosine="cosine"
hamming="hamming"

View file

@ -0,0 +1,7 @@
.load dist/vec0
.mode box
.header on
.eqp on
.echo on
create virtual table v using vec0(y);