knn cleanups and tests

2026-07-08 16:12:17 +02:00 · 2024-07-05 12:07:45 -07:00 · 2024-07-05 12:07:45 -07:00 · f217cbf2bd
commit f217cbf2bd
parent b1e7a93a11
8 changed files with 1328 additions and 398 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@
 sift/
 *.tar.gz
 *.db
+*.npy
 *.bin
 *.out
 venv/
--- a/sqlite-vec.c
+++ b/sqlite-vec.c
--- a/tests/correctness/build.py
+++ b/tests/correctness/build.py
@ -0,0 +1,16 @@
+import numpy as np
+import duckdb
+db = duckdb.connect(":memory:")
+
+result = db.execute(
+"""
+  select
+    -- _id,
+    -- title,
+    -- text as contents,
+    embedding::float[] as embeddings
+  from "hf://datasets/Supabase/dbpedia-openai-3-large-1M/dbpedia_openai_3_large_00.parquet"
+"""
+).fetchnumpy()['embeddings']
+
+np.save("dbpedia_openai_3_large_00.npy", np.vstack(result))
--- a/tests/correctness/test-correctness.py
+++ b/tests/correctness/test-correctness.py
@ -0,0 +1,124 @@
+import numpy as np
+import numpy.typing as npt
+import time
+import tqdm
+import pytest
+
+def cosine_similarity(
+    vec: npt.NDArray[np.float32], mat: npt.NDArray[np.float32], do_norm: bool = True
+) -> npt.NDArray[np.float32]:
+    sim = vec @ mat.T
+    if do_norm:
+        sim /= np.linalg.norm(vec) * np.linalg.norm(mat, axis=1)
+    return sim
+
+def distance_l2(
+    vec: npt.NDArray[np.float32], mat: npt.NDArray[np.float32]
+) -> npt.NDArray[np.float32]:
+    return np.sqrt(np.sum((mat - vec) ** 2, axis=1))
+
+
+def topk(
+    vec: npt.NDArray[np.float32],
+    mat: npt.NDArray[np.float32],
+    k: int = 5,
+) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.float32]]:
+    distances = distance_l2(vec, mat)
+    # Rather than sorting all similarities and taking the top K, it's faster to
+    # argpartition and then just sort the top K.
+    # The difference is O(N logN) vs O(N + k logk)
+    indices = np.argpartition(distances, kth=k)[:k]
+    top_indices = indices[np.argsort(distances[indices])]
+    return top_indices, distances[top_indices]
+
+
+
+vec = np.array([1.0, 2.0, 3.0], dtype=np.float32)
+mat = np.array([
+   [4.0, 5.0, 6.0],
+   [1.0, 2.0, 1.0],
+   [7.0, 8.0, 9.0]
+], dtype=np.float32)
+indices, distances = topk(vec, mat, k=2)
+print(indices)
+print(distances)
+
+import sqlite3
+import json
+db = sqlite3.connect(":memory:")
+db.enable_load_extension(True)
+db.load_extension("../../dist/vec0")
+db.execute("select load_extension('../../dist/vec0', 'sqlite3_vec_fs_read_init')")
+db.enable_load_extension(False)
+
+results = db.execute(
+   '''
+     select
+      key,
+      --value,
+      vec_distance_l2(:q, value) as distance
+    from json_each(:base)
+    order by distance
+    limit 2
+   ''',
+   {
+      'base': json.dumps(mat.tolist()),
+      'q': '[1.0, 2.0, 3.0]'
+   }).fetchall()
+a = [row[0] for row in results]
+b = [row[1] for row in results]
+print(a)
+print(b)
+
+
+#import sys; sys.exit()
+
+db.execute('PRAGMA page_size=16384')
+
+print("Loading into sqlite-vec vec0 table...")
+t0 = time.time()
+db.execute("create virtual table v using vec0(a float[3072], chunk_size=16)")
+db.execute('insert into v select rowid, vector from vec_npy_each(vec_npy_file("dbpedia_openai_3_large_00.npy"))')
+print(time.time() - t0)
+
+print("loading numpy array...")
+t0 = time.time()
+base = np.load('dbpedia_openai_3_large_00.npy')
+print(time.time() - t0)
+
+np.random.seed(1)
+queries = base[np.random.choice(base.shape[0], 20, replace=False), :]
+
+np_durations = []
+vec_durations = []
+from random import randrange
+
+def test_all():
+  for idx, query in tqdm.tqdm(enumerate(queries)):
+    #k = randrange(20, 1000)
+    #k = 500
+    k = 10
+
+    t0 = time.time()
+    np_ids, np_distances = topk(query, base, k=k)
+    np_durations.append(time.time() - t0)
+
+    t0 = time.time()
+    rows = db.execute('select rowid, distance from v where a match ? and k = ?', [query, k]).fetchall()
+    vec_durations.append(time.time() - t0)
+
+    vec_ids = [row[0] for row in rows]
+    vec_distances = [row[1] for row in rows]
+
+    assert vec_distances == np_distances.tolist()
+    #assert vec_ids == np_ids.tolist()
+    #if (vec_ids != np_ids).any():
+    #    print('idx', idx)
+    #    print('query', query)
+    #    print('np_ids', np_ids)
+    #    print('np_distances', np_distances)
+    #    print('vec_ids', vec_ids)
+    #    print('vec_distances', vec_distances)
+    #    raise Exception(idx)
+
+  print('final', 'np' ,np.mean(np_durations), 'vec', np.mean(vec_durations))
--- a/tests/leak-fixtures/each.sql
+++ b/tests/leak-fixtures/each.sql
@ -0,0 +1,16 @@
+.load dist/vec0
+.mode box
+.header on
+.eqp on
+.echo on
+
+select sqlite_version(), vec_version();
+
+select * from vec_each('[1,2,3]');
+
+select *
+from json_each('[
+  [1,2,3,4],
+  [1,2,3,4]
+]')
+join vec_each(json_each.value);
--- a/tests/leak-fixtures/knn.sql
+++ b/tests/leak-fixtures/knn.sql
@ -0,0 +1,61 @@
+.load dist/vec0
+.mode box
+.header on
+.eqp on
+.echo on
+
+select sqlite_version(), vec_version();
+
+create virtual table v using vec0(a float[1], chunk_size=8);
+
+insert into v
+  select value, format('[%f]', value / 100.0)
+  from generate_series(1, 100);
+
+select
+  rowid,
+  vec_to_json(a)
+from v
+where a match '[.3]'
+  and k = 2;
+
+select
+  rowid,
+  vec_to_json(a)
+from v
+where a match '[.3]'
+  and k = 0;
+
+
+select
+  rowid,
+  vec_to_json(a)
+from v
+where a match '[2.0]'
+  and k = 2
+  and rowid in (1,2,3,4,5);
+
+
+
+with queries as (
+  select
+    rowid as query_id,
+    json_array(value / 100.0) as value
+  from generate_series(24, 39)
+)
+select
+  query_id,
+  rowid,
+  distance,
+  vec_to_json(a)
+from queries, v
+where a match queries.value
+  and k =5;
+
+
+select *
+from v
+where rowid in (1,2,3,4);
+
+drop table v;
+
--- a/tests/test-loadable.py
+++ b/tests/test-loadable.py
@ -113,11 +113,33 @@ FUNCTIONS = [
    "vec_quantize_i8",
    "vec_quantize_i8",
    "vec_slice",
+    "vec_static_blob_from_raw",
    "vec_sub",
    "vec_to_json",
    "vec_version",
 ]
-MODULES = ["vec0", "vec_each", "vec_npy_each"]
+MODULES = [
+    "vec0",
+    "vec_each",
+    "vec_npy_each",
+    "vec_static_blob_entries",
+    "vec_static_blobs",
+]
+
+
+@pytest.mark.skip(reason="TODO")
+def test_vec_static_blob_from_raw():
+    pass
+
+
+@pytest.mark.skip(reason="TODO")
+def test_vec_static_blobs():
+    pass
+
+
+@pytest.mark.skip(reason="TODO")
+def test_vec_static_blob_entries():
+    pass


 def test_funcs():
@ -420,6 +442,11 @@ def test_vec_slice():
    ):
        vec_slice(b"\xab\xab\xab\xab", 1, 0)

+    with _raises(
+        "slice 'start' index is equal to the 'end' index, vectors must have non-zero length"
+    ):
+        vec_slice(b"\xab\xab\xab\xab", 0, 0)
+

 def test_vec_add():
    vec_add = lambda *args, a="?", b="?": db.execute(
@ -775,6 +802,7 @@ def test_vec0_drops():
        "t1_vector_chunks00",
        "t1_vector_chunks01",
    ]
+
    db.execute("drop table t1")
    assert [
        row["name"]
@ -1175,7 +1203,8 @@ def test_vec0_text_pk():
          create virtual table t using vec0(
            t_id text primary key,
            aaa float[1],
-            bbb float8[1]
+            bbb float8[1],
+            chunk_size=8
          );
        """
    )
@ -1226,6 +1255,39 @@ def test_vec0_text_pk():
        db.execute("select * from t")
    db.set_authorizer(None)

+    assert execute_all(
+        db, "select t_id, distance from t where aaa match ? and k = 3", ["[.01]"]
+    ) == [
+        {
+            "t_id": "t_1",
+            "distance": 0.09000000357627869,
+        },
+        {
+            "t_id": "t_2",
+            "distance": 0.1899999976158142,
+        },
+        {
+            "t_id": "t_3",
+            "distance": 0.2900000214576721,
+        },
+    ]
+
+    if SUPPORTS_VTAB_IN:
+        assert execute_all(
+            db,
+            "select t_id, distance from t where aaa match ? and k = 3 and t_id in ('t_2', 't_3')",
+            ["[.01]"],
+        ) == [
+            {
+                "t_id": "t_2",
+                "distance": 0.1899999976158142,
+            },
+            {
+                "t_id": "t_3",
+                "distance": 0.2900000214576721,
+            },
+        ]
+

 def test_vec0_best_index():
    db = connect(EXT_PATH)
@ -1679,6 +1741,214 @@ def test_vec0_create_errors():
    db.set_authorizer(None)


+def test_vec0_knn():
+    db = connect(EXT_PATH)
+    db.execute(
+        """
+          create virtual table v using vec0(
+            aaa float[8],
+            bbb int8[8],
+            ccc bit[8],
+            chunk_size=8
+          );
+        """
+    )
+
+    with _raises(
+        'Query vector on the "aaa" column is invalid: Input must have type BLOB (compact format) or TEXT (JSON), found NULL'
+    ):
+        db.execute("select * from v where aaa match NULL and k = 10")
+
+    with _raises(
+        'Query vector for the "aaa" column is expected to be of type float32, but a bit vector was provided.'
+    ):
+        db.execute("select * from v where aaa match vec_bit(X'AA') and k = 10")
+
+    with _raises(
+        'Dimension mismatch for inserted vector for the "aaa" column. Expected 8 dimensions but received 1.'
+    ):
+        db.execute("select * from v where aaa match vec_f32('[.1]') and k = 10")
+
+    qaaa = json.dumps([0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01])
+    with _raises("k value in knn queries must be greater than or equal to 0."):
+        db.execute("select * from v where aaa match vec_f32(?) and k = -1", [qaaa])
+
+    assert (
+        execute_all(db, "select * from v where aaa match vec_f32(?) and k = 0", [qaaa])
+        == []
+    )
+
+    # EVIDENCE-OF: V06942_23781
+    db.set_authorizer(authorizer_deny_on(sqlite3.SQLITE_READ, "v_chunks", "chunk_id"))
+    with _raises(
+        "Error preparing stmtChunk: access to v_chunks.chunk_id is prohibited",
+        sqlite3.DatabaseError,
+    ):
+        db.execute("select * from v where aaa match vec_f32(?) and k = 5", [qaaa])
+    db.set_authorizer(None)
+
+    assert (
+        execute_all(db, "select * from v where aaa match vec_f32(?) and k = 5", [qaaa])
+        == []
+    )
+
+    db.executemany(
+        """
+               INSERT INTO v VALUES
+                (:id, :vector, vec_quantize_i8(:vector, 'unit') ,vec_quantize_binary(:vector));
+        """,
+        [
+            {
+                "id": i,
+                "vector": json.dumps(
+                    [
+                        i * 0.01,
+                        i * 0.01,
+                        i * 0.01,
+                        i * 0.01,
+                        i * 0.01,
+                        i * 0.01,
+                        i * 0.01,
+                        i * 0.01,
+                    ]
+                ),
+            }
+            for i in range(24)
+        ],
+    )
+
+    assert execute_all(
+        db, "select rowid from v where aaa match vec_f32(?) and k = 9", [qaaa]
+    ) == [
+        {"rowid": 1},
+        {"rowid": 2},  # ordering of 2 and 0 here depends on if min_idx uses < or <=
+        {"rowid": 0},  #
+        {"rowid": 3},
+        {"rowid": 4},
+        {"rowid": 5},
+        {"rowid": 6},
+        {"rowid": 7},
+        {"rowid": 8},
+    ]
+    # TODO separate test, DELETE FROM WHERE rowid in (...) is fullscan that calls vec0Rowid. try on text PKs
+    db.execute("delete from v where rowid in (1, 0, 8, 9)")
+    assert execute_all(
+        db, "select rowid from v where aaa match vec_f32(?) and k = 9", [qaaa]
+    ) == [
+        {"rowid": 2},
+        {"rowid": 3},
+        {"rowid": 4},
+        {"rowid": 5},
+        {"rowid": 6},
+        {"rowid": 7},
+        {"rowid": 10},
+        {"rowid": 11},
+        {"rowid": 12},
+    ]
+
+    # EVIDENCE-OF: V05271_22109 vec0 knn validates chunk size
+    db.commit()
+    db.execute("BEGIN")
+    db.execute("update v_chunks set validity = zeroblob(100)")
+    with _raises("chunk validity size doesn't match - expected 1, found 100"):
+        db.execute("select * from v where aaa match ? and k = 2", [qaaa])
+    db.rollback()
+
+    # EVIDENCE-OF: V02796_19635 vec0 knn validates rowids size
+    db.commit()
+    db.execute("BEGIN")
+    db.execute("update v_chunks set rowids = zeroblob(100)")
+    with _raises("chunk rowids size doesn't match - expected 64, found 100"):
+        db.execute("select * from v where aaa match ? and k = 2", [qaaa])
+    db.rollback()
+
+    # EVIDENCE-OF: V16465_00535 vec0 knn validates vector chunk size
+    db.commit()
+    db.execute("BEGIN")
+    db.execute("update v_vector_chunks00 set vectors = zeroblob(100)")
+    with _raises("vectors blob size doesn't match - expected 256, found 100"):
+        db.execute("select * from v where aaa match ? and k = 2", [qaaa])
+    db.rollback()
+
+
+import numpy.typing as npt
+
+
+def np_distance_l2(
+    vec: npt.NDArray[np.float32], mat: npt.NDArray[np.float32]
+) -> npt.NDArray[np.float32]:
+    return np.sqrt(np.sum((mat - vec) ** 2, axis=1))
+
+
+def np_topk(
+    vec: npt.NDArray[np.float32],
+    mat: npt.NDArray[np.float32],
+    k: int = 5,
+) -> tuple[npt.NDArray[np.int32], npt.NDArray[np.float32]]:
+    distances = np_distance_l2(vec, mat)
+    # Rather than sorting all similarities and taking the top K, it's faster to
+    # argpartition and then just sort the top K.
+    # The difference is O(N logN) vs O(N + k logk)
+    indices = np.argpartition(distances, kth=k)[:k]
+    top_indices = indices[np.argsort(distances[indices])]
+    return top_indices, distances[top_indices]
+
+
+# import faiss
+@pytest.mark.skip(reason="TODO")
+def test_correctness_npy():
+    db = connect(EXT_PATH)
+    np.random.seed(420 + 1 + 2)
+    mat = np.random.uniform(low=-1.0, high=1.0, size=(10000, 24)).astype(np.float32)
+    queries = np.random.uniform(low=-1.0, high=1.0, size=(1000, 24)).astype(np.float32)
+
+    # sqlite-vec with vec0
+    db.execute("create virtual table v using vec0(a float[24], chunk_size=8)")
+    for v in mat:
+        db.execute("insert into v(a) values (?)", [v])
+
+    # sqlite-vec with scalar functions
+    db.execute("create table t(a float[24])")
+    for v in mat:
+        db.execute("insert into t(a) values (?)", [v])
+
+    faiss_index = faiss.IndexFlatL2(24)
+    faiss_index.add(mat)
+
+    k = 10000 - 1
+    for idx, q in enumerate(queries):
+        print(idx)
+        result = execute_all(
+            db,
+            "select rowid - 1 as idx, distance from v where a match ? and k = ?",
+            [q, k],
+        )
+        vec_vtab_rowids = [row["idx"] for row in result]
+        vec_vtab_distances = [row["distance"] for row in result]
+
+        result = execute_all(
+            db,
+            "select rowid - 1 as idx, vec_distance_l2(a, ?) as distance from t order by 2 limit ?",
+            [q, k],
+        )
+        vec_scalar_rowids = [row["idx"] for row in result]
+        vec_scalar_distances = [row["distance"] for row in result]
+        assert vec_scalar_rowids == vec_vtab_rowids
+        assert vec_scalar_distances == vec_vtab_distances
+
+        faiss_distances, faiss_rowids = faiss_index.search(np.array([q]), k)
+        faiss_distances = np.sqrt(faiss_distances)
+        assert faiss_rowids[0].tolist() == vec_scalar_rowids
+        assert faiss_distances[0].tolist() == vec_scalar_distances
+
+        assert faiss_distances[0].tolist() == vec_vtab_distances
+        assert faiss_rowids[0].tolist() == vec_vtab_rowids
+
+        np_rowids, np_distances = np_topk(mat, q, k=k)
+        # assert vec_vtab_rowids == np_rowids.tolist()
+        # assert vec_vtab_distances == np_distances.tolist()
+
+
 def test_smoke():
    db.execute("create virtual table vec_xyz using vec0( a float[2] )")
    assert execute_all(
@ -1833,20 +2103,15 @@ def test_vec0_stress_small_chunks():
                "distance": 0.0,
                "rowid": 500,
            },
-            {
-                "a": _f32([499 * 0.1] * 8),
-                "distance": 0.2828384041786194,
-                "rowid": 499,
-            },
            {
                "a": _f32([501 * 0.1] * 8),
                "distance": 0.2828384041786194,
                "rowid": 501,
            },
            {
-                "a": _f32([498 * 0.1] * 8),
-                "distance": 0.5656875967979431,
-                "rowid": 498,
+                "a": _f32([499 * 0.1] * 8),
+                "distance": 0.2828384041786194,
+                "rowid": 499,
            },
            {
                "a": _f32([502 * 0.1] * 8),
@ -1854,15 +2119,20 @@ def test_vec0_stress_small_chunks():
                "rowid": 502,
            },
            {
-                "a": _f32([497 * 0.1] * 8),
-                "distance": 0.8485260009765625,
-                "rowid": 497,
+                "a": _f32([498 * 0.1] * 8),
+                "distance": 0.5656875967979431,
+                "rowid": 498,
            },
            {
                "a": _f32([503 * 0.1] * 8),
                "distance": 0.8485260009765625,
                "rowid": 503,
            },
+            {
+                "a": _f32([497 * 0.1] * 8),
+                "distance": 0.8485260009765625,
+                "rowid": 497,
+            },
            {
                "a": _f32([496 * 0.1] * 8),
                "distance": 1.1313751935958862,
--- a/tests/unittest.rs
+++ b/tests/unittest.rs
@ -17,9 +17,55 @@ fn _min_idx(distances: Vec<f32>, k: i32) -> Vec<i32> {
    out
 }

+fn _merge_sorted_lists(
+    a: &Vec<f32>,
+    a_rowids: &Vec<i64>,
+    b: &Vec<f32>,
+    b_rowids: &Vec<i64>,
+    b_top_idx: &Vec<i32>,
+    n: usize,
+) -> (Vec<i64>, Vec<f32>) {
+    let mut out_used: i64 = 0;
+    let mut out: Vec<f32> = Vec::with_capacity(n);
+    let mut out_rowids: Vec<i64> = Vec::with_capacity(n);
+    unsafe {
+        merge_sorted_lists(
+            a.as_ptr().cast(),
+            a_rowids.as_ptr().cast(),
+            a.len() as i64,
+            b.as_ptr().cast(),
+            b_rowids.as_ptr().cast(),
+            b_top_idx.as_ptr().cast(),
+            b.len() as i64,
+            out.as_ptr().cast(),
+            out_rowids.as_ptr().cast(),
+            n as i64,
+            &mut out_used,
+        );
+        out.set_len(out_used as usize);
+        out_rowids.set_len(out_used as usize);
+    }
+
+    (out_rowids, out)
+}
+
 #[link(name = "sqlite-vec-internal")]
 extern "C" {
    fn min_idx(distances: *const f32, n: i32, out: *mut i32, k: i32) -> i32;
+
+    fn merge_sorted_lists(
+        a: *const f32,
+        a_rowids: *const i64,
+        a_length: i64,
+        b: *const f32,
+        b_rowids: *const i64,
+        b_top_idx: *const i32,
+        b_length: i64,
+        out: *const f32,
+        out_rowids: *const i64,
+        out_length: i64,
+        out_used: *mut i64,
+    );
 }

 #[cfg(test)]
@ -34,4 +80,85 @@ mod tests {
        assert_eq!(_min_idx(vec![1.0, 2.0, 3.0], 2), vec![0, 1]);
        assert_eq!(_min_idx(vec![3.0, 2.0, 1.0], 2), vec![2, 1]);
    }
+
+    #[test]
+    fn test_merge_sorted_lists() {
+        let a = &vec![0.01, 0.02, 0.03];
+        let a_rowids = &vec![1, 2, 3];
+
+        //let b = &vec![0.1, 0.2, 0.3, 0.4];
+        //let b_rowids = &vec![4, 5, 6, 7];
+        let b = &vec![0.4, 0.2, 0.3, 0.1];
+        let b_rowids = &vec![7, 5, 6, 4];
+        let b_top_idx = &vec![3, 1, 2, 0];
+
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 0),
+            (vec![], vec![])
+        );
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 1),
+            (vec![1], vec![0.01])
+        );
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 2),
+            (vec![1, 2], vec![0.01, 0.02])
+        );
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 3),
+            (vec![1, 2, 3], vec![0.01, 0.02, 0.03])
+        );
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 4),
+            (vec![1, 2, 3, 4], vec![0.01, 0.02, 0.03, 0.1])
+        );
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 5),
+            (vec![1, 2, 3, 4, 5], vec![0.01, 0.02, 0.03, 0.1, 0.2])
+        );
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 6),
+            (
+                vec![1, 2, 3, 4, 5, 6],
+                vec![0.01, 0.02, 0.03, 0.1, 0.2, 0.3]
+            )
+        );
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 7),
+            (
+                vec![1, 2, 3, 4, 5, 6, 7],
+                vec![0.01, 0.02, 0.03, 0.1, 0.2, 0.3, 0.4]
+            )
+        );
+
+        assert_eq!(
+            _merge_sorted_lists(a, a_rowids, b, b_rowids, b_top_idx, 8),
+            (
+                vec![1, 2, 3, 4, 5, 6, 7],
+                vec![0.01, 0.02, 0.03, 0.1, 0.2, 0.3, 0.4]
+            )
+        );
+    }
+    /*
+    #[test]
+    fn test_merge_sorted_lists_empty() {
+        let x = vec![0.1, 0.2, 0.3];
+        let x_rowids = vec![666, 888, 777];
+        assert_eq!(
+            _merge_sorted_lists(&x, &x_rowids, &vec![], &vec![], 3),
+            (vec![666, 888, 777], vec![0.1, 0.2, 0.3])
+        );
+        assert_eq!(
+            _merge_sorted_lists(&vec![], &vec![], &x, &x_rowids, 3),
+            (vec![666, 888, 777], vec![0.1, 0.2, 0.3])
+        );
+        assert_eq!(
+            _merge_sorted_lists(&vec![], &vec![], &x, &x_rowids, 4),
+            (vec![666, 888, 777], vec![0.1, 0.2, 0.3])
+        );
+        assert_eq!(
+            _merge_sorted_lists(&vec![], &vec![], &x, &x_rowids, 2),
+            (vec![666, 888], vec![0.1, 0.2])
+        );
+    }*/
 }