knn cleanups and tests

This commit is contained in:
Alex Garcia 2024-07-05 12:07:45 -07:00
parent b1e7a93a11
commit f217cbf2bd
8 changed files with 1328 additions and 398 deletions

View file

@ -0,0 +1,16 @@
import numpy as np
import duckdb
db = duckdb.connect(":memory:")
result = db.execute(
"""
select
-- _id,
-- title,
-- text as contents,
embedding::float[] as embeddings
from "hf://datasets/Supabase/dbpedia-openai-3-large-1M/dbpedia_openai_3_large_00.parquet"
"""
).fetchnumpy()['embeddings']
np.save("dbpedia_openai_3_large_00.npy", np.vstack(result))