PARTITION KEY support (#122)

* initial pass at PARTITION KEY support.

* unit tests

* gha this PR branch

* fixup tests

* doc internal

* fix tests, KNN/rowids in

* define SQLITE_INDEX_CONSTRAINT_OFFSET

* whoops

* update tests, syrupy, use uv

* un ignore pyproject.toml

* dot

* tests/

* type error?

* win: .exe, update error name

* try fix macos python, paren around expr?

* win bash?

* dbg :(

* explicit error

* op

* dbg win

* win ./tests/.venv/Scripts/python.exe

* block UPDATEs on partition key values for now
This commit is contained in:
Alex Garcia 2024-11-20 00:02:04 -08:00 committed by GitHub
parent ee3654701f
commit 6658624172
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 1522 additions and 245 deletions

View file

@ -3,6 +3,7 @@ on:
push: push:
branches: branches:
- main - main
- partition-by
permissions: permissions:
contents: read contents: read
jobs: jobs:
@ -10,16 +11,92 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v5 - uses: astral-sh/setup-uv@v3
with: with:
python-version: "3.12" enable-cache: true
- run: ./scripts/vendor.sh - run: ./scripts/vendor.sh
- run: make loadable static - run: make loadable static
- run: pip install pytest numpy; make test-loadable - run: uv sync --directory tests
- run: make test-loadable python=./tests/.venv/bin/python
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: sqlite-vec-linux-x86_64-extension name: sqlite-vec-linux-x86_64-extension
path: dist/* path: dist/*
build-macos-x86_64-extension:
runs-on: macos-12
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- run: uv python install 3.12
- run: ./scripts/vendor.sh
- run: make loadable static
- run: uv sync --directory tests
- run: make test-loadable python=./tests/.venv/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-macos-x86_64-extension
path: dist/*
build-macos-aarch64-extension:
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- run: ./scripts/vendor.sh
- run: make loadable static
- run: uv sync --directory tests
- run: make test-loadable python=./tests/.venv/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-macos-aarch64-extension
path: dist/*
build-windows-x86_64-extension:
runs-on: windows-2019
steps:
- uses: actions/checkout@v4
- uses: ilammy/msvc-dev-cmd@v1
- uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- run: ./scripts/vendor.sh
shell: bash
- run: make sqlite-vec.h
- run: mkdir dist
- run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
- run: uv sync --directory tests
- run: make test-loadable python=./tests/.venv/Scripts/python.exe
shell: bash
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-windows-x86_64-extension
path: dist/*
build-linux-aarch64-extension:
runs-on: ubuntu-latest
steps:
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
with:
task: start-measurement
- uses: actions/checkout@v4
with:
version: "latest"
- run: sudo apt-get install gcc-arm-linux-gnueabihf
- run: ./scripts/vendor.sh
- run: make sqlite-vec.h
- run: make CC=arm-linux-gnueabihf-gcc loadable static
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-linux-aarch64-extension
path: dist/*
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
with:
task: get-measurement
label: "all"
- uses: green-coding-solutions/eco-ci-energy-estimation@v4
with:
task: display-results
build-android-extensions: build-android-extensions:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
@ -98,58 +175,6 @@ jobs:
with: with:
name: sqlite-vec-${{ matrix.platforms.name }}-extension name: sqlite-vec-${{ matrix.platforms.name }}-extension
path: dist/* path: dist/*
build-macos-x86_64-extension:
runs-on: macos-12
steps:
- uses: actions/checkout@v4
- run: ./scripts/vendor.sh
- run: make loadable static
- run: /usr/local/opt/python@3/libexec/bin/python -m pip install --break-system-packages pytest numpy; make test-loadable python=/usr/local/opt/python@3/libexec/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-macos-x86_64-extension
path: dist/*
build-macos-aarch64-extension:
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- run: ./scripts/vendor.sh
- run: make loadable static
- run: /opt/homebrew/opt/python3/libexec/bin/python -m pip install pytest numpy --break-system-packages; make test-loadable python=/opt/homebrew/opt/python3/libexec/bin/python
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-macos-aarch64-extension
path: dist/*
build-windows-x86_64-extension:
runs-on: windows-2019
steps:
- uses: actions/checkout@v4
- uses: ilammy/msvc-dev-cmd@v1
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- run: ./scripts/vendor.sh
shell: bash
- run: make sqlite-vec.h
- run: mkdir dist
- run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
- run: pip install pytest numpy; make test-loadable
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-windows-x86_64-extension
path: dist/*
build-linux-aarch64-extension:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get install gcc-arm-linux-gnueabihf
- run: ./scripts/vendor.sh
- run: make sqlite-vec.h
- run: make CC=arm-linux-gnueabihf-gcc loadable static
- uses: actions/upload-artifact@v4
with:
name: sqlite-vec-linux-aarch64-extension
path: dist/*
build-wasm32-emscripten: build-wasm32-emscripten:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:

1
.gitignore vendored
View file

@ -26,4 +26,3 @@ sqlite-vec.h
tmp/ tmp/
poetry.lock poetry.lock
pyproject.toml

54
ARCHITECTURE.md Normal file
View file

@ -0,0 +1,54 @@
## `vec0`
### idxStr
The `vec0` idxStr is a string composed of single "header" character and 0 or
more "blocks" of 4 characters each.
The "header" charcter denotes the type of query plan, as determined by the
`enum vec0_query_plan` values. The current possible values are:
| Name | Value | Description |
| -------------------------- | ----- | ---------------------------------------------------------------------- |
| `VEC0_QUERY_PLAN_FULLSCAN` | `'1'` | Perform a full-scan on all rows |
| `VEC0_QUERY_PLAN_POINT` | `'2'` | Perform a single-lookup point query for the provided rowid |
| `VEC0_QUERY_PLAN_KNN` | `'3'` | Perform a KNN-style query on the provided query vector and parameters. |
Each 4-character "block" is associated with a corresponding value in `argv[]`. For example, the 1st block at byte offset `1-4` (inclusive) is the 1st block and is associated with `argv[1]`. The 2nd block at byte offset `5-8` (inclusive) is associated with `argv[2]` and so on. Each block describes what kind of value or filter the given `argv[i]` value is.
#### `VEC0_IDXSTR_KIND_KNN_MATCH` (`'{'`)
`argv[i]` is the query vector of the KNN query.
The remaining 3 characters of the block are `_` fillers.
#### `VEC0_IDXSTR_KIND_KNN_K` (`'}'`)
`argv[i]` is the limit/k value of the KNN query.
The remaining 3 characters of the block are `_` fillers.
#### `VEC0_IDXSTR_KIND_KNN_ROWID_IN` (`'['`)
`argv[i]` is the optional `rowid in (...)` value, and must be handled with [`sqlite3_vtab_in_first()` /
`sqlite3_vtab_in_next()`](https://www.sqlite.org/c3ref/vtab_in_first.html).
The remaining 3 characters of the block are `_` fillers.
#### `VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT` (`']'`)
`argv[i]` is a "constraint" on a specific partition key.
The second character of the block denotes which partition key to filter on, using `A` to denote the first partition key column, `B` for the second, etc. It is encoded with `'A' + partition_idx` and can be decoded with `c - 'A'`.
The third character of the block denotes which operator is used in the constraint. It will be one of the values of `enum vec0_partition_operator`, as only a subset of operations are supported on partition keys.
The fourth character of the block is a `_` filler.
#### `VEC0_IDXSTR_KIND_POINT_ID` (`'!'`)
`argv[i]` is the value of the rowid or id to match against for the point query.
The remaining 3 characters of the block are `_` fillers.

View file

@ -185,13 +185,16 @@ publish-release:
# -k test_vec0_update # -k test_vec0_update
test-loadable: loadable test-loadable: loadable
$(PYTHON) -m pytest -vv -s -x tests/test-loadable.py $(PYTHON) -m pytest -vv -s -x tests/test-*.py
test-loadable-snapshot-update: loadable test-loadable-snapshot-update: loadable
$(PYTHON) -m pytest -vv tests/test-loadable.py --snapshot-update $(PYTHON) -m pytest -vv tests/test-loadable.py --snapshot-update
test-loadable-watch: test-loadable-watch:
watchexec -w sqlite-vec.c -w tests/test-loadable.py -w Makefile --clear -- make test-loadable watchexec --exts c,py,Makefile --clear -- make test-loadable
test-unit:
$(CC) tests/test-unit.c sqlite-vec.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit
site-dev: site-dev:
npm --prefix site run dev npm --prefix site run dev

5
TODO Normal file
View file

@ -0,0 +1,5 @@
# partition
- [ ] UPDATE on partition key values
- remove previous row from chunk, insert into new one?
- [ ] properly sqlite3_vtab_nochange / sqlite3_value_nochange handling

File diff suppressed because it is too large Load diff

49
test.sql Normal file
View file

@ -0,0 +1,49 @@
.load dist/vec0
.echo on
.bail on
.mode qbox
create virtual table v using vec0(a float[1]);
select count(*) from v_chunks;
insert into v(a) values ('[1.11]');
select * from v;
drop table v;
create virtual table v using vec0(
v_aaa float[1],
partk_xxx int partition key,
v_bbb float[2],
partk_yyy text partition key,
chunk_size=32
);
insert into v(rowid, v_aaa, partk_xxx, v_bbb, partk_yyy) values
(1, '[.1]', 999, '[.11, .11]', 'alex'),
(2, '[.2]', 999, '[.22, .22]', 'alex'),
(3, '[.3]', 999, '[.33, .33]', 'brian');
select rowid, vec_to_json(v_aaa), partk_xxx, vec_to_json(v_bbb), partk_yyy from v;
select * from v;
select * from v where rowid = 2;
update v
set v_aaa = '[.222]',
v_bbb = '[.222, .222]'
where rowid = 2;
select rowid, vec_to_json(v_aaa), partk_xxx, vec_to_json(v_bbb), partk_yyy from v;
select chunk_id, size, sequence_id, partition00, partition01, (validity), length(rowids) from v_chunks;
--explain query plan
select *, distance
from v
where v_aaa match '[.5]'
and partk_xxx = 999
and partk_yyy = 'alex'
--and partk_xxx != 20
and k = 5;

1
tests/.python-version Normal file
View file

@ -0,0 +1 @@
3.12

View file

@ -0,0 +1,245 @@
# serializer version: 1
# name: test_constructor_limit[max 4 partition keys]
dict({
'error': 'OperationalError',
'message': 'vec0 constructor error: More than 4 partition key columns were provided',
})
# ---
# name: test_normal[1 row]
dict({
'v_chunks': OrderedDict({
'sql': 'select * from v_chunks',
'rows': list([
OrderedDict({
'chunk_id': 1,
'size': 8,
'sequence_id': None,
'partition00': 100,
'validity': b'\x01',
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_rowids': OrderedDict({
'sql': 'select * from v_rowids',
'rows': list([
OrderedDict({
'rowid': 1,
'id': None,
'chunk_id': 1,
'chunk_offset': 0,
}),
]),
}),
'v_vector_chunks00': OrderedDict({
'sql': 'select * from v_vector_chunks00',
'rows': list([
OrderedDict({
'rowid': 1,
'vectors': b'\x11"3D\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
})
# ---
# name: test_normal[2 rows, same parition]
dict({
'v_chunks': OrderedDict({
'sql': 'select * from v_chunks',
'rows': list([
OrderedDict({
'chunk_id': 1,
'size': 8,
'sequence_id': None,
'partition00': 100,
'validity': b'\x03',
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_rowids': OrderedDict({
'sql': 'select * from v_rowids',
'rows': list([
OrderedDict({
'rowid': 1,
'id': None,
'chunk_id': 1,
'chunk_offset': 0,
}),
OrderedDict({
'rowid': 2,
'id': None,
'chunk_id': 1,
'chunk_offset': 1,
}),
]),
}),
'v_vector_chunks00': OrderedDict({
'sql': 'select * from v_vector_chunks00',
'rows': list([
OrderedDict({
'rowid': 1,
'vectors': b'\x11"3DDUfw\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
})
# ---
# name: test_normal[3 rows, 2 partitions]
dict({
'v_chunks': OrderedDict({
'sql': 'select * from v_chunks',
'rows': list([
OrderedDict({
'chunk_id': 1,
'size': 8,
'sequence_id': None,
'partition00': 100,
'validity': b'\x03',
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
OrderedDict({
'chunk_id': 2,
'size': 8,
'sequence_id': None,
'partition00': 200,
'validity': b'\x01',
'rowids': b'\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_rowids': OrderedDict({
'sql': 'select * from v_rowids',
'rows': list([
OrderedDict({
'rowid': 1,
'id': None,
'chunk_id': 1,
'chunk_offset': 0,
}),
OrderedDict({
'rowid': 2,
'id': None,
'chunk_id': 1,
'chunk_offset': 1,
}),
OrderedDict({
'rowid': 3,
'id': None,
'chunk_id': 2,
'chunk_offset': 0,
}),
]),
}),
'v_vector_chunks00': OrderedDict({
'sql': 'select * from v_vector_chunks00',
'rows': list([
OrderedDict({
'rowid': 1,
'vectors': b'\x11"3DDUfw\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
OrderedDict({
'rowid': 2,
'vectors': b'\x88\x99\xaa\xbb\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
})
# ---
# name: test_types[1. raises type error]
dict({
'error': 'OperationalError',
'message': 'Parition key type mismatch: The partition key column p1 has type INTEGER, but TEXT was provided.',
})
# ---
# name: test_types[2. empty DB]
dict({
'v_chunks': OrderedDict({
'sql': 'select * from v_chunks',
'rows': list([
]),
}),
'v_rowids': OrderedDict({
'sql': 'select * from v_rowids',
'rows': list([
]),
}),
'v_vector_chunks00': OrderedDict({
'sql': 'select * from v_vector_chunks00',
'rows': list([
]),
}),
})
# ---
# name: test_types[3. allow nulls]
OrderedDict({
'sql': 'insert into v(p1, a) values(?, ?)',
'rows': list([
]),
})
# ---
# name: test_types[4. show NULL partition key]
dict({
'v_chunks': OrderedDict({
'sql': 'select * from v_chunks',
'rows': list([
OrderedDict({
'chunk_id': 1,
'size': 8,
'sequence_id': None,
'partition00': None,
'validity': b'\x01',
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
'v_rowids': OrderedDict({
'sql': 'select * from v_rowids',
'rows': list([
OrderedDict({
'rowid': 1,
'id': None,
'chunk_id': 1,
'chunk_offset': 0,
}),
]),
}),
'v_vector_chunks00': OrderedDict({
'sql': 'select * from v_vector_chunks00',
'rows': list([
OrderedDict({
'rowid': 1,
'vectors': b'\x11"3D\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
})
# ---
# name: test_updates[1. Initial dataset]
OrderedDict({
'sql': 'select * from v',
'rows': list([
OrderedDict({
'rowid': 1,
'p': 'a',
'a': b'\x11\x11\x11\x11',
}),
OrderedDict({
'rowid': 2,
'p': 'a',
'a': b'""""',
}),
OrderedDict({
'rowid': 3,
'p': 'a',
'a': b'3333',
}),
]),
})
# ---
# name: test_updates[2. update #1]
dict({
'error': 'OperationalError',
'message': 'UPDATE on partition key columns are not supported yet. ',
})
# ---

12
tests/conftest.py Normal file
View file

@ -0,0 +1,12 @@
import pytest
import sqlite3
@pytest.fixture()
def db():
db = sqlite3.connect(":memory:")
db.row_factory = sqlite3.Row
db.enable_load_extension(True)
db.load_extension("dist/vec0")
db.enable_load_extension(False)
return db

9
tests/pyproject.toml Normal file
View file

@ -0,0 +1,9 @@
[project]
name = "tests"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"pytest", "numpy", "syrupy"
]

View file

@ -81,7 +81,7 @@ def connect(ext, path=":memory:", extra_entrypoint=None):
db = connect(EXT_PATH) db = connect(EXT_PATH)
def explain_query_plan(sql): def explain_query_plan(sql, db=db):
return db.execute("explain query plan " + sql).fetchone()["detail"] return db.execute("explain query plan " + sql).fetchone()["detail"]
@ -1497,6 +1497,13 @@ def test_vec0_text_pk():
] ]
if SUPPORTS_VTAB_IN: if SUPPORTS_VTAB_IN:
assert re.match(
("SCAN (TABLE )?t VIRTUAL TABLE INDEX 0:3{___}___\[___"),
explain_query_plan(
"select t_id, distance from t where aaa match '' and k = 3 and t_id in ('t_2', 't_3')",
db=db,
),
)
assert execute_all( assert execute_all(
db, db,
"select t_id, distance from t where aaa match ? and k = 3 and t_id in ('t_2', 't_3')", "select t_id, distance from t where aaa match ? and k = 3 and t_id in ('t_2', 't_3')",
@ -1939,20 +1946,6 @@ def test_vec0_create_errors():
db.execute("create virtual table t1 using vec0(a float[1])") db.execute("create virtual table t1 using vec0(a float[1])")
db.set_authorizer(None) db.set_authorizer(None)
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_INSERT, "t1_chunks"))
with _raises(
"Could not create create an initial chunk",
):
db.execute("create virtual table t1 using vec0(a float[1])")
db.set_authorizer(None)
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_INSERT, "t1_vector_chunks00"))
with _raises(
"Could not create create an initial chunk",
):
db.execute("create virtual table t1 using vec0(a float[1])")
db.set_authorizer(None)
# EVIDENCE-OF: V21406_05476 vec0 init raises error on 'latest chunk' init error # EVIDENCE-OF: V21406_05476 vec0 init raises error on 'latest chunk' init error
db.execute("BEGIN") db.execute("BEGIN")
db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_READ, "t1_chunks", "")) db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_READ, "t1_chunks", ""))
@ -2231,32 +2224,34 @@ def test_smoke():
}, },
] ]
chunk = db.execute("select * from vec_xyz_chunks").fetchone() chunk = db.execute("select * from vec_xyz_chunks").fetchone()
assert chunk["chunk_id"] == 1 # as of TODO, no initial row is inside the chunks table
assert chunk["validity"] == bytearray(int(1024 / 8)) assert chunk is None
assert chunk["rowids"] == bytearray(int(1024 * 8)) # assert chunk["chunk_id"] == 1
vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone() # assert chunk["validity"] == bytearray(int(1024 / 8))
assert vchunk["rowid"] == 1 # assert chunk["rowids"] == bytearray(int(1024 * 8))
assert vchunk["vectors"] == bytearray(int(1024 * 4 * 2)) # vchunk = db.execute("select * from vec_xyz_vector_chunks00").fetchone()
# assert vchunk["rowid"] == 1
# assert vchunk["vectors"] == bytearray(int(1024 * 4 * 2))
assert re.match( assert re.match(
"SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:knn:", "SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:3{___}___",
explain_query_plan( explain_query_plan(
"select * from vec_xyz where a match X'' and k = 10 order by distance" "select * from vec_xyz where a match X'' and k = 10 order by distance"
), ),
) )
if SUPPORTS_VTAB_LIMIT: if SUPPORTS_VTAB_LIMIT:
assert re.match( assert re.match(
"SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:knn:", "SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:3{___}___",
explain_query_plan( explain_query_plan(
"select * from vec_xyz where a match X'' order by distance limit 10" "select * from vec_xyz where a match X'' order by distance limit 10"
), ),
) )
assert re.match( assert re.match(
"SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:fullscan", "SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 0:1",
explain_query_plan("select * from vec_xyz"), explain_query_plan("select * from vec_xyz"),
) )
assert re.match( assert re.match(
"SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 3:point", "SCAN (TABLE )?vec_xyz VIRTUAL TABLE INDEX 3:2",
explain_query_plan("select * from vec_xyz where rowid = 4"), explain_query_plan("select * from vec_xyz where rowid = 4"),
) )

View file

@ -0,0 +1,115 @@
import sqlite3
from collections import OrderedDict
def test_constructor_limit(db, snapshot):
assert exec(
db,
"""
create virtual table v using vec0(
p1 int partition key,
p2 int partition key,
p3 int partition key,
p4 int partition key,
p5 int partition key,
v float[1]
)
""",
) == snapshot(name="max 4 partition keys")
def test_normal(db, snapshot):
db.execute(
"create virtual table v using vec0(p1 int partition key, a float[1], chunk_size=8)"
)
db.execute("insert into v(rowid, p1, a) values (1, 100, X'11223344')")
assert vec0_shadow_table_contents(db, "v") == snapshot(name="1 row")
db.execute("insert into v(rowid, p1, a) values (2, 100, X'44556677')")
assert vec0_shadow_table_contents(db, "v") == snapshot(name="2 rows, same parition")
db.execute("insert into v(rowid, p1, a) values (3, 200, X'8899aabb')")
assert vec0_shadow_table_contents(db, "v") == snapshot(name="3 rows, 2 partitions")
def test_types(db, snapshot):
db.execute(
"create virtual table v using vec0(p1 int partition key, a float[1], chunk_size=8)"
)
# EVIDENCE-OF: V11454_28292
assert exec(
db, "insert into v(p1, a) values(?, ?)", ["not int", b"\x11\x22\x33\x44"]
) == snapshot(name="1. raises type error")
assert vec0_shadow_table_contents(db, "v") == snapshot(name="2. empty DB")
# but allow NULLs
assert exec(
db, "insert into v(p1, a) values(?, ?)", [None, b"\x11\x22\x33\x44"]
) == snapshot(name="3. allow nulls")
assert vec0_shadow_table_contents(db, "v") == snapshot(
name="4. show NULL partition key"
)
def test_updates(db, snapshot):
db.execute(
"create virtual table v using vec0(p text partition key, a float[1], chunk_size=8)"
)
db.execute(
"insert into v(rowid, p, a) values (?, ?, ?)", [1, "a", b"\x11\x11\x11\x11"]
)
db.execute(
"insert into v(rowid, p, a) values (?, ?, ?)", [2, "a", b"\x22\x22\x22\x22"]
)
db.execute(
"insert into v(rowid, p, a) values (?, ?, ?)", [3, "a", b"\x33\x33\x33\x33"]
)
assert exec(db, "select * from v") == snapshot(name="1. Initial dataset")
assert exec(db, "update v set p = ? where rowid = ?", ["new", 1]) == snapshot(
name="2. update #1"
)
class Row:
def __init__(self):
pass
def __repr__(self) -> str:
return repr()
def exec(db, sql, parameters=[]):
try:
rows = db.execute(sql, parameters).fetchall()
except (sqlite3.OperationalError, sqlite3.DatabaseError) as e:
return {
"error": e.__class__.__name__,
"message": str(e),
}
a = []
for row in rows:
o = OrderedDict()
for k in row.keys():
o[k] = row[k]
a.append(o)
result = OrderedDict()
result["sql"] = sql
result["rows"] = a
return result
def vec0_shadow_table_contents(db, v):
shadow_tables = [
row[0]
for row in db.execute(
"select name from sqlite_master where name like ? order by 1", [f"{v}_%"]
).fetchall()
]
o = {}
for shadow_table in shadow_tables:
o[shadow_table] = exec(db, f"select * from {shadow_table}")
return o

54
tests/test-unit.c Normal file
View file

@ -0,0 +1,54 @@
#include "../sqlite-vec.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
#define countof(x) (sizeof(x) / sizeof((x)[0]))
void test_vec0_parse_partition_key_definition() {
printf("Starting %s...\n", __func__);
typedef struct {
char * test;
int expected_rc;
const char *expected_column_name;
int expected_column_type;
} TestCase;
TestCase suite[] = {
{"user_id integer partition key", SQLITE_OK, "user_id", SQLITE_INTEGER},
{"USER_id int partition key", SQLITE_OK, "USER_id", SQLITE_INTEGER},
{"category text partition key", SQLITE_OK, "category", SQLITE_TEXT},
{"", SQLITE_EMPTY, "", 0},
{"document_id text primary key", SQLITE_EMPTY, "", 0},
{"document_id text partition keyy", SQLITE_EMPTY, "", 0},
};
for(int i = 0; i < countof(suite); i++) {
char * out_column_name;
int out_column_name_length;
int out_column_type;
int rc;
rc = vec0_parse_partition_key_definition(
suite[i].test,
strlen(suite[i].test),
&out_column_name,
&out_column_name_length,
&out_column_type
);
printf("2\n");
assert(rc == suite[i].expected_rc);
if(rc == SQLITE_OK) {
assert(out_column_name_length == strlen(suite[i].expected_column_name));
assert(strncmp(out_column_name, suite[i].expected_column_name, out_column_name_length) == 0);
assert(out_column_type == suite[i].expected_column_type);
}
printf("✅ %s\n", suite[i].test);
}
}
int main() {
printf("Starting unit tests...\n");
test_vec0_parse_partition_key_definition();
}

120
tests/uv.lock generated Normal file
View file

@ -0,0 +1,120 @@
version = 1
requires-python = ">=3.12"
[[package]]
name = "colorama"
version = "0.4.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
]
[[package]]
name = "iniconfig"
version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 },
]
[[package]]
name = "numpy"
version = "2.1.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/25/ca/1166b75c21abd1da445b97bf1fa2f14f423c6cfb4fc7c4ef31dccf9f6a94/numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761", size = 20166090 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8a/f0/385eb9970309643cbca4fc6eebc8bb16e560de129c91258dfaa18498da8b/numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e", size = 20849658 },
{ url = "https://files.pythonhosted.org/packages/54/4a/765b4607f0fecbb239638d610d04ec0a0ded9b4951c56dc68cef79026abf/numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958", size = 13492258 },
{ url = "https://files.pythonhosted.org/packages/bd/a7/2332679479c70b68dccbf4a8eb9c9b5ee383164b161bee9284ac141fbd33/numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8", size = 5090249 },
{ url = "https://files.pythonhosted.org/packages/c1/67/4aa00316b3b981a822c7a239d3a8135be2a6945d1fd11d0efb25d361711a/numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564", size = 6621704 },
{ url = "https://files.pythonhosted.org/packages/5e/da/1a429ae58b3b6c364eeec93bf044c532f2ff7b48a52e41050896cf15d5b1/numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512", size = 13606089 },
{ url = "https://files.pythonhosted.org/packages/9e/3e/3757f304c704f2f0294a6b8340fcf2be244038be07da4cccf390fa678a9f/numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b", size = 16043185 },
{ url = "https://files.pythonhosted.org/packages/43/97/75329c28fea3113d00c8d2daf9bc5828d58d78ed661d8e05e234f86f0f6d/numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc", size = 16410751 },
{ url = "https://files.pythonhosted.org/packages/ad/7a/442965e98b34e0ae9da319f075b387bcb9a1e0658276cc63adb8c9686f7b/numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0", size = 14082705 },
{ url = "https://files.pythonhosted.org/packages/ac/b6/26108cf2cfa5c7e03fb969b595c93131eab4a399762b51ce9ebec2332e80/numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9", size = 6239077 },
{ url = "https://files.pythonhosted.org/packages/a6/84/fa11dad3404b7634aaab50733581ce11e5350383311ea7a7010f464c0170/numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a", size = 12566858 },
{ url = "https://files.pythonhosted.org/packages/4d/0b/620591441457e25f3404c8057eb924d04f161244cb8a3680d529419aa86e/numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f", size = 20836263 },
{ url = "https://files.pythonhosted.org/packages/45/e1/210b2d8b31ce9119145433e6ea78046e30771de3fe353f313b2778142f34/numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598", size = 13507771 },
{ url = "https://files.pythonhosted.org/packages/55/44/aa9ee3caee02fa5a45f2c3b95cafe59c44e4b278fbbf895a93e88b308555/numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57", size = 5075805 },
{ url = "https://files.pythonhosted.org/packages/78/d6/61de6e7e31915ba4d87bbe1ae859e83e6582ea14c6add07c8f7eefd8488f/numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe", size = 6608380 },
{ url = "https://files.pythonhosted.org/packages/3e/46/48bdf9b7241e317e6cf94276fe11ba673c06d1fdf115d8b4ebf616affd1a/numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43", size = 13602451 },
{ url = "https://files.pythonhosted.org/packages/70/50/73f9a5aa0810cdccda9c1d20be3cbe4a4d6ea6bfd6931464a44c95eef731/numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56", size = 16039822 },
{ url = "https://files.pythonhosted.org/packages/ad/cd/098bc1d5a5bc5307cfc65ee9369d0ca658ed88fbd7307b0d49fab6ca5fa5/numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a", size = 16411822 },
{ url = "https://files.pythonhosted.org/packages/83/a2/7d4467a2a6d984549053b37945620209e702cf96a8bc658bc04bba13c9e2/numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef", size = 14079598 },
{ url = "https://files.pythonhosted.org/packages/e9/6a/d64514dcecb2ee70bfdfad10c42b76cab657e7ee31944ff7a600f141d9e9/numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f", size = 6236021 },
{ url = "https://files.pythonhosted.org/packages/bb/f9/12297ed8d8301a401e7d8eb6b418d32547f1d700ed3c038d325a605421a4/numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed", size = 12560405 },
{ url = "https://files.pythonhosted.org/packages/a7/45/7f9244cd792e163b334e3a7f02dff1239d2890b6f37ebf9e82cbe17debc0/numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f", size = 20859062 },
{ url = "https://files.pythonhosted.org/packages/b1/b4/a084218e7e92b506d634105b13e27a3a6645312b93e1c699cc9025adb0e1/numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4", size = 13515839 },
{ url = "https://files.pythonhosted.org/packages/27/45/58ed3f88028dcf80e6ea580311dc3edefdd94248f5770deb980500ef85dd/numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e", size = 5116031 },
{ url = "https://files.pythonhosted.org/packages/37/a8/eb689432eb977d83229094b58b0f53249d2209742f7de529c49d61a124a0/numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0", size = 6629977 },
{ url = "https://files.pythonhosted.org/packages/42/a3/5355ad51ac73c23334c7caaed01adadfda49544f646fcbfbb4331deb267b/numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408", size = 13575951 },
{ url = "https://files.pythonhosted.org/packages/c4/70/ea9646d203104e647988cb7d7279f135257a6b7e3354ea6c56f8bafdb095/numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6", size = 16022655 },
{ url = "https://files.pythonhosted.org/packages/14/ce/7fc0612903e91ff9d0b3f2eda4e18ef9904814afcae5b0f08edb7f637883/numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f", size = 16399902 },
{ url = "https://files.pythonhosted.org/packages/ef/62/1d3204313357591c913c32132a28f09a26357e33ea3c4e2fe81269e0dca1/numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17", size = 14067180 },
{ url = "https://files.pythonhosted.org/packages/24/d7/78a40ed1d80e23a774cb8a34ae8a9493ba1b4271dde96e56ccdbab1620ef/numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48", size = 6291907 },
{ url = "https://files.pythonhosted.org/packages/86/09/a5ab407bd7f5f5599e6a9261f964ace03a73e7c6928de906981c31c38082/numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4", size = 12644098 },
]
[[package]]
name = "packaging"
version = "24.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
]
[[package]]
name = "pluggy"
version = "1.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
]
[[package]]
name = "pytest"
version = "8.3.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8b/6c/62bbd536103af674e227c41a8f3dcd022d591f6eed5facb5a0f31ee33bbc/pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181", size = 1442487 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6b/77/7440a06a8ead44c7757a64362dd22df5760f9b12dc5f11b6188cd2fc27a0/pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2", size = 342341 },
]
[[package]]
name = "syrupy"
version = "4.7.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/67/81/f46d234fa4ca0edcdeed973bab9acd8f8ac186537cdc850e9e84a00f61a0/syrupy-4.7.2.tar.gz", hash = "sha256:ea45e099f242de1bb53018c238f408a5bb6c82007bc687aefcbeaa0e1c2e935a", size = 49320 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b9/75/57b629fdd256efc58fb045618d603ce0b0f5fcc477f34b758e34423efb99/syrupy-4.7.2-py3-none-any.whl", hash = "sha256:eae7ba6be5aed190237caa93be288e97ca1eec5ca58760e4818972a10c4acc64", size = 49234 },
]
[[package]]
name = "tests"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "numpy" },
{ name = "pytest" },
{ name = "syrupy" },
]
[package.metadata]
requires-dist = [
{ name = "numpy" },
{ name = "pytest" },
{ name = "syrupy" },
]