sqlite-vec/examples/nbc-headlines/2_build.ipynb

560 lines
26 KiB
Text
Raw Normal View History

2024-09-07 09:22:21 -07:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[no code]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
".open tmp3.db"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"<th>\n",
"schema\n",
"</th>\n",
"<th>\n",
"name\n",
"</th>\n",
"<th>\n",
"type\n",
"</th>\n",
"<th>\n",
"ncol\n",
"</th>\n",
"<th>\n",
"wr\n",
"</th>\n",
"<th>\n",
"strict\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"main\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"sqlite_sequence\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"table\n",
"</td>\n",
"<td >\n",
"2\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"main\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"articles\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"table\n",
"</td>\n",
"<td >\n",
"9\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"main\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"sqlite_schema\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"table\n",
"</td>\n",
"<td >\n",
"5\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"temp\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"sqlite_temp_schema\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"table\n",
"</td>\n",
"<td >\n",
"5\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"4 rows × 6 columns\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┐\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mschema\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mname \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtype \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mncol\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mwr\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mstrict\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┤\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_sequence \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticles \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_schema \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtemp \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_temp_schema\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┘\n",
"\u001b[0m\u001b[0m"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select * from pragma_table_list;"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"0 row × 0 column\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"create virtual table fts_headlines using fts5(\n",
" headline,\n",
" content='articles', content_rowid='id'\n",
");\n",
"\n",
"insert into fts_headlines(rowid, headline)\n",
" select rowid, headline\n",
" from articles;\n",
"\n",
"insert into fts_headlines(fts_headlines) values('optimize');"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"<th>\n",
"headline\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Washington state faces first outbreak of a deadly fungal infection that&#39;s on the rise in the U.S.\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Israel-Hamas war live updates: U.S. readies weeks of retaliatory strikes against Iran-linked targets\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"House to vote on an expanded child tax credit bill\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Travel costs, staff and ads added up before Ron DeSantis dropped out\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Victims of Hamas attack in Israel and their families blame Iran in new federal lawsuit\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Trump meets with Teamsters as he targets Biden support\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"The bipartisan border deal would not allow 5,000 illegal crossings per day, despite what Trump says\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Machu Picchu tourism suffering after week of protests against new ticketing system\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"FCC moves to criminalize most AI-generated robocalls\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Civil rights group says N.C. public schools are harming LGBTQ students, violating federal law\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"10 rows × 1 column\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWashington state faces first outbreak of a deadly fungal infection that's on the rise in the U.S. \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIsrael-Hamas war live updates: U.S. readies weeks of retaliatory strikes against Iran-linked targets\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHouse to vote on an expanded child tax credit bill \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTravel costs, staff and ads added up before Ron DeSantis dropped out \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVictims of Hamas attack in Israel and their families blame Iran in new federal lawsuit \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump meets with Teamsters as he targets Biden support \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mThe bipartisan border deal would not allow 5,000 illegal crossings per day, despite what Trump says \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMachu Picchu tourism suffering after week of protests against new ticketing system \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFCC moves to criminalize most AI-generated robocalls \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mCivil rights group says N.C. public schools are harming LGBTQ students, violating federal law \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n",
"\u001b[0m\u001b[0m"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select * from fts_headlines limit 10;"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"0 row × 0 column\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
".load ./lembed0\n",
".load ../../dist/vec0\n",
"\n",
"insert into lembed_models(name, model) values\n",
" ('default', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf'));"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"create virtual table vec_headlines using vec0(\n",
" article_id integer primary key,\n",
" headline_embedding float[384]\n",
");\n",
"\n",
"insert into vec_headlines(article_id, headline_embedding)\n",
"select\n",
" rowid,\n",
" lembed(headline)\n",
"from articles;"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"<th>\n",
"article_id\n",
"</th>\n",
"<th>\n",
"headline_embedding\n",
"</th>\n",
"<th>\n",
"vec_to_json(vec_slice(headline_embedding, 0, 8))\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td >\n",
"1\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.055018,-0.021632,-0.012835,0.048403,0.039037,-0.012824,-0.043627,0.031868]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"2\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.048287,0.023883,-0.004665,0.001806,0.030342,0.050691,0.050082,-0.127660]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"3\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[-0.042424,-0.019893,0.022101,-0.030609,-0.016659,0.008453,-0.056492,0.093258]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"4\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.076178,-0.080511,0.034440,0.027351,0.028441,0.038463,-0.023355,0.089898]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"5\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.028183,0.091150,-0.043882,0.028064,0.010961,0.018683,0.011500,-0.015776]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"6\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[-0.061114,-0.031104,0.060050,-0.037375,0.007963,-0.049056,-0.042365,-0.021792]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"7\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.059814,0.026079,0.061488,0.011823,0.048770,-0.035152,0.031329,-0.015644]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"8\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.095066,0.001522,-0.030417,0.091296,0.068129,-0.021405,0.008825,0.023469]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"9\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.017708,-0.086306,0.002358,0.010318,0.008864,0.025368,0.094156,-0.006123]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"10\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.034452,0.045083,-0.000227,0.102294,0.047915,-0.012732,-0.024640,-0.043112]\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"10 rows × 3 columns\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticle_id\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline_embedding\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec_to_json(vec_slice(headline_embedding, 0, 8)) \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.055018,-0.021632,-0.012835,0.048403,0.039037,-0.012824,-0.043627,0.031868] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.048287,0.023883,-0.004665,0.001806,0.030342,0.050691,0.050082,-0.127660] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[-0.042424,-0.019893,0.022101,-0.030609,-0.016659,0.008453,-0.056492,0.093258] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.076178,-0.080511,0.034440,0.027351,0.028441,0.038463,-0.023355,0.089898] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.028183,0.091150,-0.043882,0.028064,0.010961,0.018683,0.011500,-0.015776] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[-0.061114,-0.031104,0.060050,-0.037375,0.007963,-0.049056,-0.042365,-0.021792]\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.059814,0.026079,0.061488,0.011823,0.048770,-0.035152,0.031329,-0.015644] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.095066,0.001522,-0.030417,0.091296,0.068129,-0.021405,0.008825,0.023469] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.017708,-0.086306,0.002358,0.010318,0.008864,0.025368,0.094156,-0.006123] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.034452,0.045083,-0.000227,0.102294,0.047915,-0.012732,-0.024640,-0.043112] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n",
"\u001b[0m\u001b[0m"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select\n",
" article_id,\n",
" headline_embedding,\n",
" vec_to_json(vec_slice(headline_embedding, 0, 8))\n",
"from vec_headlines\n",
"limit 10;"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Solite",
"language": "sql",
"name": "solite"
},
"language_info": {
"file_extension": ".sql",
"mimetype": "text/x.sqlite",
"name": "sql",
"nb_converter": "script",
"pygments_lexer": "sql",
"version": "TODO"
}
},
"nbformat": 4,
"nbformat_minor": 2
}