mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
doc updates
This commit is contained in:
parent
df48ac2416
commit
b62f6f19a8
31 changed files with 751 additions and 97 deletions
90
examples/python-recipes/openai-sample.py
Normal file
90
examples/python-recipes/openai-sample.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
# pip install openai sqlite-vec
|
||||
|
||||
from openai import OpenAI
|
||||
import sqlite3
|
||||
import sqlite_vec
|
||||
import struct
|
||||
from typing import List
|
||||
|
||||
|
||||
def serialize(vector: List[float]) -> bytes:
|
||||
"""serializes a list of floats into a compact "raw bytes" format"""
|
||||
return struct.pack("%sf" % len(vector), *vector)
|
||||
|
||||
|
||||
sentences = [
|
||||
"Capri-Sun is a brand of juice concentrate–based drinks manufactured by the German company Wild and regional licensees.",
|
||||
"George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.",
|
||||
"Alaqua Cox is a Native American (Menominee) actress.",
|
||||
"Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.",
|
||||
"Tamarindo, also commonly known as agua de tamarindo, is a non-alcoholic beverage made of tamarind, sugar, and water.",
|
||||
]
|
||||
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
# change ':memory:' to a filepath to persist data
|
||||
db = sqlite3.connect(":memory:")
|
||||
db.enable_load_extension(True)
|
||||
sqlite_vec.load(db)
|
||||
db.enable_load_extension(False)
|
||||
|
||||
db.execute(
|
||||
"""
|
||||
CREATE TABLE sentences(
|
||||
id INTEGER PRIMARY KEY,
|
||||
sentence TEXT
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
with db:
|
||||
for i, sentence in enumerate(sentences):
|
||||
db.execute("INSERT INTO sentences(id, sentence) VALUES(?, ?)", [i, sentence])
|
||||
|
||||
db.execute(
|
||||
"""
|
||||
CREATE VIRTUAL TABLE vec_sentences USING vec0(
|
||||
id INTEGER PRIMARY KEY,
|
||||
sentence_embedding FLOAT[1536]
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
with db:
|
||||
sentence_rows = db.execute("SELECT id, sentence FROM sentences").fetchall()
|
||||
response = client.embeddings.create(
|
||||
input=[row[1] for row in sentence_rows], model="text-embedding-3-small"
|
||||
)
|
||||
for (id, _), embedding in zip(sentence_rows, response.data):
|
||||
db.execute(
|
||||
"INSERT INTO vec_sentences(id, sentence_embedding) VALUES(?, ?)",
|
||||
[id, serialize(embedding.embedding)],
|
||||
)
|
||||
|
||||
|
||||
query = "fruity liquids"
|
||||
query_embedding = (
|
||||
client.embeddings.create(input=query, model="text-embedding-3-small")
|
||||
.data[0]
|
||||
.embedding
|
||||
)
|
||||
|
||||
results = db.execute(
|
||||
"""
|
||||
SELECT
|
||||
vec_sentences.id,
|
||||
distance,
|
||||
sentence
|
||||
FROM vec_sentences
|
||||
LEFT JOIN sentences ON sentences.id = vec_sentences.id
|
||||
WHERE sentence_embedding MATCH ?
|
||||
AND k = 3
|
||||
ORDER BY distance
|
||||
""",
|
||||
[serialize(query_embedding)],
|
||||
).fetchall()
|
||||
|
||||
for row in results:
|
||||
print(row)
|
||||
Loading…
Add table
Add a link
Reference in a new issue