sqlite-vec/examples/nbc-headlines/2_build.ipynb
2024-09-07 09:22:21 -07:00

559 lines
26 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[no code]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
".open tmp3.db"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"<th>\n",
"schema\n",
"</th>\n",
"<th>\n",
"name\n",
"</th>\n",
"<th>\n",
"type\n",
"</th>\n",
"<th>\n",
"ncol\n",
"</th>\n",
"<th>\n",
"wr\n",
"</th>\n",
"<th>\n",
"strict\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"main\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"sqlite_sequence\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"table\n",
"</td>\n",
"<td >\n",
"2\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"main\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"articles\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"table\n",
"</td>\n",
"<td >\n",
"9\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"main\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"sqlite_schema\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"table\n",
"</td>\n",
"<td >\n",
"5\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"temp\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"sqlite_temp_schema\n",
"</td>\n",
"<td style=\"text-align: left;\">\n",
"table\n",
"</td>\n",
"<td >\n",
"5\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"<td >\n",
"0\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"4 rows × 6 columns\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┐\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mschema\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mname \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtype \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mncol\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mwr\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mstrict\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┤\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_sequence \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticles \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_schema \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtemp \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_temp_schema\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┘\n",
"\u001b[0m\u001b[0m"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select * from pragma_table_list;"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"0 row × 0 column\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"create virtual table fts_headlines using fts5(\n",
" headline,\n",
" content='articles', content_rowid='id'\n",
");\n",
"\n",
"insert into fts_headlines(rowid, headline)\n",
" select rowid, headline\n",
" from articles;\n",
"\n",
"insert into fts_headlines(fts_headlines) values('optimize');"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"<th>\n",
"headline\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Washington state faces first outbreak of a deadly fungal infection that&#39;s on the rise in the U.S.\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Israel-Hamas war live updates: U.S. readies weeks of retaliatory strikes against Iran-linked targets\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"House to vote on an expanded child tax credit bill\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Travel costs, staff and ads added up before Ron DeSantis dropped out\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Victims of Hamas attack in Israel and their families blame Iran in new federal lawsuit\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Trump meets with Teamsters as he targets Biden support\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"The bipartisan border deal would not allow 5,000 illegal crossings per day, despite what Trump says\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Machu Picchu tourism suffering after week of protests against new ticketing system\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"FCC moves to criminalize most AI-generated robocalls\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td style=\"text-align: left;\">\n",
"Civil rights group says N.C. public schools are harming LGBTQ students, violating federal law\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"10 rows × 1 column\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWashington state faces first outbreak of a deadly fungal infection that's on the rise in the U.S. \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIsrael-Hamas war live updates: U.S. readies weeks of retaliatory strikes against Iran-linked targets\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHouse to vote on an expanded child tax credit bill \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTravel costs, staff and ads added up before Ron DeSantis dropped out \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVictims of Hamas attack in Israel and their families blame Iran in new federal lawsuit \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump meets with Teamsters as he targets Biden support \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mThe bipartisan border deal would not allow 5,000 illegal crossings per day, despite what Trump says \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMachu Picchu tourism suffering after week of protests against new ticketing system \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFCC moves to criminalize most AI-generated robocalls \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mCivil rights group says N.C. public schools are harming LGBTQ students, violating federal law \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n",
"\u001b[0m\u001b[0m"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select * from fts_headlines limit 10;"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"0 row × 0 column\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
".load ./lembed0\n",
".load ../../dist/vec0\n",
"\n",
"insert into lembed_models(name, model) values\n",
" ('default', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf'));"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"create virtual table vec_headlines using vec0(\n",
" article_id integer primary key,\n",
" headline_embedding float[384]\n",
");\n",
"\n",
"insert into vec_headlines(article_id, headline_embedding)\n",
"select\n",
" rowid,\n",
" lembed(headline)\n",
"from articles;"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table>\n",
"<thead>\n",
"<tr style=\"text-align: center;\">\n",
"<th>\n",
"article_id\n",
"</th>\n",
"<th>\n",
"headline_embedding\n",
"</th>\n",
"<th>\n",
"vec_to_json(vec_slice(headline_embedding, 0, 8))\n",
"</th>\n",
"</tr>\n",
"</thead>\n",
"<tbody>\n",
"<tr>\n",
"<td >\n",
"1\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.055018,-0.021632,-0.012835,0.048403,0.039037,-0.012824,-0.043627,0.031868]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"2\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.048287,0.023883,-0.004665,0.001806,0.030342,0.050691,0.050082,-0.127660]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"3\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[-0.042424,-0.019893,0.022101,-0.030609,-0.016659,0.008453,-0.056492,0.093258]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"4\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.076178,-0.080511,0.034440,0.027351,0.028441,0.038463,-0.023355,0.089898]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"5\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.028183,0.091150,-0.043882,0.028064,0.010961,0.018683,0.011500,-0.015776]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"6\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[-0.061114,-0.031104,0.060050,-0.037375,0.007963,-0.049056,-0.042365,-0.021792]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"7\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.059814,0.026079,0.061488,0.011823,0.048770,-0.035152,0.031329,-0.015644]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"8\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.095066,0.001522,-0.030417,0.091296,0.068129,-0.021405,0.008825,0.023469]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"9\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.017708,-0.086306,0.002358,0.010318,0.008864,0.025368,0.094156,-0.006123]\n",
"</td>\n",
"</tr>\n",
"<tr>\n",
"<td >\n",
"10\n",
"</td>\n",
"<td style=\"color: blue\">\n",
"Blob&lt;1536&gt;\n",
"</td>\n",
"<td style=\"color: red\">\n",
"[0.034452,0.045083,-0.000227,0.102294,0.047915,-0.012732,-0.024640,-0.043112]\n",
"</td>\n",
"</tr>\n",
"</tbody>\n",
"</table>\n",
"<div style=\"text-align: right;\">\n",
"10 rows × 3 columns\n",
"</div>\n",
"</div>\n"
],
"text/plain": [
"\u001b[0m┌\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticle_id\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline_embedding\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec_to_json(vec_slice(headline_embedding, 0, 8)) \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.055018,-0.021632,-0.012835,0.048403,0.039037,-0.012824,-0.043627,0.031868] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.048287,0.023883,-0.004665,0.001806,0.030342,0.050691,0.050082,-0.127660] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[-0.042424,-0.019893,0.022101,-0.030609,-0.016659,0.008453,-0.056492,0.093258] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.076178,-0.080511,0.034440,0.027351,0.028441,0.038463,-0.023355,0.089898] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.028183,0.091150,-0.043882,0.028064,0.010961,0.018683,0.011500,-0.015776] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[-0.061114,-0.031104,0.060050,-0.037375,0.007963,-0.049056,-0.042365,-0.021792]\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.059814,0.026079,0.061488,0.011823,0.048770,-0.035152,0.031329,-0.015644] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.095066,0.001522,-0.030417,0.091296,0.068129,-0.021405,0.008825,0.023469] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.017708,-0.086306,0.002358,0.010318,0.008864,0.025368,0.094156,-0.006123] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.034452,0.045083,-0.000227,0.102294,0.047915,-0.012732,-0.024640,-0.043112] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n",
"\u001b[0m\u001b[0m"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select\n",
" article_id,\n",
" headline_embedding,\n",
" vec_to_json(vec_slice(headline_embedding, 0, 8))\n",
"from vec_headlines\n",
"limit 10;"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Solite",
"language": "sql",
"name": "solite"
},
"language_info": {
"file_extension": ".sql",
"mimetype": "text/x.sqlite",
"name": "sql",
"nb_converter": "script",
"pygments_lexer": "sql",
"version": "TODO"
}
},
"nbformat": 4,
"nbformat_minor": 2
}