diff --git a/examples/nbc-headlines/1_scrape.ipynb b/examples/nbc-headlines/1_scrape.ipynb new file mode 100644 index 0000000..cb012a1 --- /dev/null +++ b/examples/nbc-headlines/1_scrape.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "import { Database, Statement } from \"jsr:@db/sqlite@0.11\";\n", + "import { parseHTML } from \"npm:linkedom\";\n", + "import * as d3 from \"npm:d3-time\";\n", + "import * as sqlitePath from \"npm:sqlite-path\";\n", + "import * as sqliteUrl from \"npm:sqlite-url\";\n", + "import * as sqliteRegex from \"npm:sqlite-regex\";\n" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "const months = [\"january\", \"february\", \"march\", \"april\", \"may\", \"june\", \"july\", \"august\", \"september\", \"october\", \"november\", \"december\"]\n", + "\n", + "class Db {\n", + " db: Database;\n", + " #stmtInsertArticle: Statement;\n", + "\n", + " constructor(path:string) {\n", + " this.db = new Database(path);\n", + " this.db.enableLoadExtension = true;\n", + " this.db.loadExtension(sqlitePath.getLoadablePath());\n", + " this.db.loadExtension(sqliteUrl.getLoadablePath());\n", + " this.db.loadExtension(sqliteRegex.getLoadablePath());\n", + " this.db.enableLoadExtension = false;\n", + "\n", + " this.db.exec(`\n", + " CREATE TABLE IF NOT EXISTS articles(\n", + " id integer primary key autoincrement,\n", + " year integer,\n", + " month integer,\n", + " slug TEXT,\n", + " slug_id TEXT,\n", + " headline TEXT,\n", + " url TEXT,\n", + " category1 TEXT,\n", + " category2 TEXT\n", + " )\n", + " `);\n", + "\n", + " this.#stmtInsertArticle = this.db.prepare(`\n", + " insert into articles(year, month, slug, slug_id, headline, url, category1, category2)\n", + " select\n", + " :year as year,\n", + " :month as month,\n", + " regex_capture(\n", + " '(?P.+)-(?P[^-]+)$',\n", + " path_at(url_path(:url), -1),\n", + " 'slug'\n", + " ) as slug,\n", + " regex_capture(\n", + " '(?P.+)-(?P[^-]+)$',\n", + " path_at(url_path(:url), -1),\n", + " 'id'\n", + " ) as slug_id,\n", + " :headline as headline,\n", + " :url as url,\n", + " path_at(url_path(:url), 0) as category1,\n", + " iif(\n", + " path_length(url_path(:url)) > 2,\n", + " path_at(url_path(:url), 1),\n", + " null\n", + " ) as category2\n", + " `);\n", + " }\n", + "\n", + " insertArticles(year:number, month:text, articles:{url: string, year: number, month: number}[]) {\n", + " const tx = this.db.transaction((year, month, articles) => {\n", + " for(const article of articles) {\n", + " this.#stmtInsertArticle.run({...article, year, month})\n", + " }\n", + " });\n", + " tx(year, month, articles);\n", + " }\n", + "}\n", + "\n", + "async function insertMonth(db: Db, year:number, month: text) {\n", + " let url = `https://www.nbcnews.com/archive/articles/${year}/${month}`;\n", + " while(true) {\n", + " const monthPage = await fetch(url).then(r=>r.text())\n", + " const {document:monthPageDoc} = parseHTML(monthPage);\n", + " const monthEntries = monthPageDoc\n", + " .querySelectorAll('.MonthPage a')\n", + " .map(a => ({headline: a.innerText, url: a.getAttribute('href')}));\n", + " db.insertArticles(year, months.findIndex(m => m === month)+1, monthEntries);\n", + " const next = monthPageDoc.querySelector('a.Pagination__next.Pagination__enable');\n", + " if(!next) {\n", + " break;\n", + " }\n", + " url = `https://www.nbcnews.com${next.getAttribute('href')}`;\n", + " }\n", + "\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "async function backfill(db, start: Date, end: Date) {\n", + " const targets = d3.timeMonths(start, end)\n", + " .map(date => ({year: date.getFullYear(), monthIndex: date.getMonth()}));\n", + " for(const target of targets) {\n", + " console.log(`${target.year} ${target.monthIndex}`)\n", + " await insertMonth(db, target.year, months[target.monthIndex]);\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024 0\n", + "2024 1\n", + "2024 2\n", + "2024 3\n", + "2024 4\n", + "2024 5\n", + "2024 6\n", + "2024 7\n" + ] + }, + { + "data": { + "text/plain": [ + "\u001b[33m1\u001b[39m" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const db = new Db(\":memory:\");\n", + "await backfill(db, new Date('2024-01-01'), new Date())\n", + "db.db.exec(\"vacuum into 'headlines-2024.db'\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Deno", + "language": "typescript", + "name": "deno" + }, + "language_info": { + "codemirror_mode": "typescript", + "file_extension": ".ts", + "mimetype": "text/x.typescript", + "name": "typescript", + "nbconvert_exporter": "script", + "pygments_lexer": "typescript", + "version": "5.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/nbc-headlines/2_build.ipynb b/examples/nbc-headlines/2_build.ipynb new file mode 100644 index 0000000..7fe79fb --- /dev/null +++ b/examples/nbc-headlines/2_build.ipynb @@ -0,0 +1,559 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[no code]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".open tmp3.db" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "schema\n", + "\n", + "name\n", + "\n", + "type\n", + "\n", + "ncol\n", + "\n", + "wr\n", + "\n", + "strict\n", + "
\n", + "main\n", + "\n", + "sqlite_sequence\n", + "\n", + "table\n", + "\n", + "2\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "main\n", + "\n", + "articles\n", + "\n", + "table\n", + "\n", + "9\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "main\n", + "\n", + "sqlite_schema\n", + "\n", + "table\n", + "\n", + "5\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "temp\n", + "\n", + "sqlite_temp_schema\n", + "\n", + "table\n", + "\n", + "5\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "
\n", + "4 rows × 6 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mschema\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mname \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtype \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mncol\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mwr\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mstrict\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_sequence \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticles \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_schema \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtemp \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_temp_schema\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "select * from pragma_table_list;" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "0 row × 0 column\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "create virtual table fts_headlines using fts5(\n", + " headline,\n", + " content='articles', content_rowid='id'\n", + ");\n", + "\n", + "insert into fts_headlines(rowid, headline)\n", + " select rowid, headline\n", + " from articles;\n", + "\n", + "insert into fts_headlines(fts_headlines) values('optimize');" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "headline\n", + "
\n", + "Washington state faces first outbreak of a deadly fungal infection that's on the rise in the U.S.\n", + "
\n", + "Israel-Hamas war live updates: U.S. readies weeks of retaliatory strikes against Iran-linked targets\n", + "
\n", + "House to vote on an expanded child tax credit bill\n", + "
\n", + "Travel costs, staff and ads added up before Ron DeSantis dropped out\n", + "
\n", + "Victims of Hamas attack in Israel and their families blame Iran in new federal lawsuit\n", + "
\n", + "Trump meets with Teamsters as he targets Biden support\n", + "
\n", + "The bipartisan border deal would not allow 5,000 illegal crossings per day, despite what Trump says\n", + "
\n", + "Machu Picchu tourism suffering after week of protests against new ticketing system\n", + "
\n", + "FCC moves to criminalize most AI-generated robocalls\n", + "
\n", + "Civil rights group says N.C. public schools are harming LGBTQ students, violating federal law\n", + "
\n", + "
\n", + "10 rows × 1 column\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWashington state faces first outbreak of a deadly fungal infection that's on the rise in the U.S. \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIsrael-Hamas war live updates: U.S. readies weeks of retaliatory strikes against Iran-linked targets\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHouse to vote on an expanded child tax credit bill \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTravel costs, staff and ads added up before Ron DeSantis dropped out \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVictims of Hamas attack in Israel and their families blame Iran in new federal lawsuit \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump meets with Teamsters as he targets Biden support \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mThe bipartisan border deal would not allow 5,000 illegal crossings per day, despite what Trump says \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMachu Picchu tourism suffering after week of protests against new ticketing system \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFCC moves to criminalize most AI-generated robocalls \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mCivil rights group says N.C. public schools are harming LGBTQ students, violating federal law \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "select * from fts_headlines limit 10;" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "0 row × 0 column\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".load ./lembed0\n", + ".load ../../dist/vec0\n", + "\n", + "insert into lembed_models(name, model) values\n", + " ('default', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf'));" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "create virtual table vec_headlines using vec0(\n", + " article_id integer primary key,\n", + " headline_embedding float[384]\n", + ");\n", + "\n", + "insert into vec_headlines(article_id, headline_embedding)\n", + "select\n", + " rowid,\n", + " lembed(headline)\n", + "from articles;" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "article_id\n", + "\n", + "headline_embedding\n", + "\n", + "vec_to_json(vec_slice(headline_embedding, 0, 8))\n", + "
\n", + "1\n", + "\n", + "Blob<1536>\n", + "\n", + "[0.055018,-0.021632,-0.012835,0.048403,0.039037,-0.012824,-0.043627,0.031868]\n", + "
\n", + "2\n", + "\n", + "Blob<1536>\n", + "\n", + "[0.048287,0.023883,-0.004665,0.001806,0.030342,0.050691,0.050082,-0.127660]\n", + "
\n", + "3\n", + "\n", + "Blob<1536>\n", + "\n", + "[-0.042424,-0.019893,0.022101,-0.030609,-0.016659,0.008453,-0.056492,0.093258]\n", + "
\n", + "4\n", + "\n", + "Blob<1536>\n", + "\n", + "[0.076178,-0.080511,0.034440,0.027351,0.028441,0.038463,-0.023355,0.089898]\n", + "
\n", + "5\n", + "\n", + "Blob<1536>\n", + "\n", + "[0.028183,0.091150,-0.043882,0.028064,0.010961,0.018683,0.011500,-0.015776]\n", + "
\n", + "6\n", + "\n", + "Blob<1536>\n", + "\n", + "[-0.061114,-0.031104,0.060050,-0.037375,0.007963,-0.049056,-0.042365,-0.021792]\n", + "
\n", + "7\n", + "\n", + "Blob<1536>\n", + "\n", + "[0.059814,0.026079,0.061488,0.011823,0.048770,-0.035152,0.031329,-0.015644]\n", + "
\n", + "8\n", + "\n", + "Blob<1536>\n", + "\n", + "[0.095066,0.001522,-0.030417,0.091296,0.068129,-0.021405,0.008825,0.023469]\n", + "
\n", + "9\n", + "\n", + "Blob<1536>\n", + "\n", + "[0.017708,-0.086306,0.002358,0.010318,0.008864,0.025368,0.094156,-0.006123]\n", + "
\n", + "10\n", + "\n", + "Blob<1536>\n", + "\n", + "[0.034452,0.045083,-0.000227,0.102294,0.047915,-0.012732,-0.024640,-0.043112]\n", + "
\n", + "
\n", + "10 rows × 3 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticle_id\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline_embedding\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec_to_json(vec_slice(headline_embedding, 0, 8)) \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.055018,-0.021632,-0.012835,0.048403,0.039037,-0.012824,-0.043627,0.031868] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.048287,0.023883,-0.004665,0.001806,0.030342,0.050691,0.050082,-0.127660] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[-0.042424,-0.019893,0.022101,-0.030609,-0.016659,0.008453,-0.056492,0.093258] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.076178,-0.080511,0.034440,0.027351,0.028441,0.038463,-0.023355,0.089898] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.028183,0.091150,-0.043882,0.028064,0.010961,0.018683,0.011500,-0.015776] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[-0.061114,-0.031104,0.060050,-0.037375,0.007963,-0.049056,-0.042365,-0.021792]\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.059814,0.026079,0.061488,0.011823,0.048770,-0.035152,0.031329,-0.015644] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.095066,0.001522,-0.030417,0.091296,0.068129,-0.021405,0.008825,0.023469] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.017708,-0.086306,0.002358,0.010318,0.008864,0.025368,0.094156,-0.006123] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.034452,0.045083,-0.000227,0.102294,0.047915,-0.012732,-0.024640,-0.043112] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "select\n", + " article_id,\n", + " headline_embedding,\n", + " vec_to_json(vec_slice(headline_embedding, 0, 8))\n", + "from vec_headlines\n", + "limit 10;" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Solite", + "language": "sql", + "name": "solite" + }, + "language_info": { + "file_extension": ".sql", + "mimetype": "text/x.sqlite", + "name": "sql", + "nb_converter": "script", + "pygments_lexer": "sql", + "version": "TODO" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/nbc-headlines/3_search.ipynb b/examples/nbc-headlines/3_search.ipynb new file mode 100644 index 0000000..b033b78 --- /dev/null +++ b/examples/nbc-headlines/3_search.ipynb @@ -0,0 +1,1276 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "vec_version()\n", + "\n", + "lembed_version()\n", + "
\n", + "v0.1.2-alpha.6\n", + "\n", + "v0.0.1-alpha.8\n", + "
\n", + "
\n", + "1 row × 2 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec_version() \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mlembed_version()\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mv0.1.2-alpha.6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mv0.0.1-alpha.8 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".open tmp3.db\n", + ".load ../../dist/vec0\n", + ".load ./lembed0\n", + "\n", + "insert into lembed_models(name, model)\n", + " values (\n", + " 'default',\n", + " lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf')\n", + " );\n", + "\n", + "select vec_version(), lembed_version();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## FTS Search" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "rowid\n", + "\n", + "headline_highlighted\n", + "\n", + "rank\n", + "
\n", + "4666\n", + "\n", + "Kamala Harris visits <b>Planned</b> <b>Parenthood</b> clinic\n", + "\n", + "-18.9139950477264\n", + "
\n", + "6521\n", + "\n", + "Former Marine sentenced to 9 years in prison for firebombing <b>Planned</b> <b>Parenthood</b> clinic\n", + "\n", + "-14.807022703838651\n", + "
\n", + "
\n", + "2 rows × 3 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mrowid\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline_highlighted \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mrank \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4666\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -18.9139950477264\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6521\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFormer Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-14.807022703838651\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".param set query planned parenthood\n", + "\n", + "select\n", + " rowid,\n", + " highlight(fts_headlines, 0, '', '') as headline_highlighted,\n", + " rank\n", + "from fts_headlines\n", + "where headline match :query\n", + "order by rank\n", + "limit 10;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vector Search" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "article_id\n", + "\n", + "headline\n", + "\n", + "distance\n", + "
\n", + "9475\n", + "\n", + "Inside a Gen Z campaign to shake up Congress\n", + "\n", + "1.0655490159988403\n", + "
\n", + "11236\n", + "\n", + "Bunnie XO and Jelly Roll announce plans to have a baby using IVF\n", + "\n", + "1.071580171585083\n", + "
\n", + "4110\n", + "\n", + "First over-the-counter birth control pill hits store shelves\n", + "\n", + "1.0831280946731567\n", + "
\n", + "1808\n", + "\n", + "More nonpregnant women are requesting abortion pills to have on hand\n", + "\n", + "1.0897283554077148\n", + "
\n", + "11154\n", + "\n", + "Meet the anti-abortion group using white coats and research to advance its cause\n", + "\n", + "1.089759349822998\n", + "
\n", + "5980\n", + "\n", + "Deathbed confession leads to bodies of mother and daughter killed 24 years ago\n", + "\n", + "1.092090368270874\n", + "
\n", + "3540\n", + "\n", + "Holding photos of their deceased children, parents lobby Congress to pass online safety legislation\n", + "\n", + "1.1028637886047363\n", + "
\n", + "4109\n", + "\n", + "Alabama fertility clinic at the center of IVF ruling is resuming some procedures\n", + "\n", + "1.103183627128601\n", + "
\n", + "4889\n", + "\n", + "Inside the organized crime rings plaguing Ulta, T.J. Maxx, Walgreens and other retailers\n", + "\n", + "1.1056489944458008\n", + "
\n", + "86\n", + "\n", + "Florida bars transgender people from changing the sex on their driver's licenses\n", + "\n", + "1.1073163747787476\n", + "
\n", + "
\n", + "10 rows × 3 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticle_id\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mdistance \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9475\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInside a Gen Z campaign to shake up Congress \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0655490159988403\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 11236\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mBunnie XO and Jelly Roll announce plans to have a baby using IVF \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.071580171585083\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4110\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFirst over-the-counter birth control pill hits store shelves \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0831280946731567\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1808\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMore nonpregnant women are requesting abortion pills to have on hand \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0897283554077148\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 11154\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMeet the anti-abortion group using white coats and research to advance its cause \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.089759349822998\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5980\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mDeathbed confession leads to bodies of mother and daughter killed 24 years ago \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.092090368270874\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3540\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHolding photos of their deceased children, parents lobby Congress to pass online safety legislation\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1028637886047363\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4109\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAlabama fertility clinic at the center of IVF ruling is resuming some procedures \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.103183627128601\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4889\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInside the organized crime rings plaguing Ulta, T.J. Maxx, Walgreens and other retailers \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1056489944458008\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 86\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFlorida bars transgender people from changing the sex on their driver's licenses \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1073163747787476\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".param set query planned parenthood\n", + "\n", + "select\n", + " article_id,\n", + " articles.headline,\n", + " distance\n", + "from vec_headlines\n", + "left join articles on articles.rowid = vec_headlines.article_id\n", + "where headline_embedding match lembed(:query)\n", + " and k = 10;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## FTS + Vector search: RFF" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "id\n", + "\n", + "headline\n", + "\n", + "vector_distance\n", + "\n", + "fts_score\n", + "\n", + "combined_score\n", + "
\n", + "86\n", + "\n", + "Florida bars transgender people from changing the sex on their driver's licenses\n", + "\n", + "1.1073163747787476\n", + "\n", + "\n", + "\n", + "0.00684931506849315\n", + "
\n", + "1808\n", + "\n", + "More nonpregnant women are requesting abortion pills to have on hand\n", + "\n", + "1.0897283554077148\n", + "\n", + "\n", + "\n", + "0.0005353319057815846\n", + "
\n", + "3540\n", + "\n", + "Holding photos of their deceased children, parents lobby Congress to pass online safety legislation\n", + "\n", + "1.1028637886047363\n", + "\n", + "\n", + "\n", + "0.0002777777777777778\n", + "
\n", + "4109\n", + "\n", + "Alabama fertility clinic at the center of IVF ruling is resuming some procedures\n", + "\n", + "1.103183627128601\n", + "\n", + "\n", + "\n", + "0.00023986567522187575\n", + "
\n", + "4110\n", + "\n", + "First over-the-counter birth control pill hits store shelves\n", + "\n", + "1.0831280946731567\n", + "\n", + "\n", + "\n", + "0.00023980815347721823\n", + "
\n", + "4666\n", + "\n", + "Kamala Harris visits Planned Parenthood clinic\n", + "\n", + "\n", + "\n", + "-18.9139950477264\n", + "\n", + "0.00021159542953872197\n", + "
\n", + "4889\n", + "\n", + "Inside the organized crime rings plaguing Ulta, T.J. Maxx, Walgreens and other retailers\n", + "\n", + "1.1056489944458008\n", + "\n", + "\n", + "\n", + "0.00020206102242877348\n", + "
\n", + "5980\n", + "\n", + "Deathbed confession leads to bodies of mother and daughter killed 24 years ago\n", + "\n", + "1.092090368270874\n", + "\n", + "\n", + "\n", + "0.00016556291390728477\n", + "
\n", + "6521\n", + "\n", + "Former Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\n", + "\n", + "\n", + "\n", + "-14.807022703838651\n", + "\n", + "0.000151952590791673\n", + "
\n", + "9475\n", + "\n", + "Inside a Gen Z campaign to shake up Congress\n", + "\n", + "1.0655490159988403\n", + "\n", + "\n", + "\n", + "0.0001048767697954903\n", + "
\n", + "11154\n", + "\n", + "Meet the anti-abortion group using white coats and research to advance its cause\n", + "\n", + "1.089759349822998\n", + "\n", + "\n", + "\n", + "0.00008917424647761727\n", + "
\n", + "11236\n", + "\n", + "Bunnie XO and Jelly Roll announce plans to have a baby using IVF\n", + "\n", + "1.071580171585083\n", + "\n", + "\n", + "\n", + "0.00008852691218130312\n", + "
\n", + "
\n", + "12 rows × 5 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mid \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvector_distance \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts_score \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mcombined_score \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 86\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFlorida bars transgender people from changing the sex on their driver's licenses \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1073163747787476\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.00684931506849315\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1808\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMore nonpregnant women are requesting abortion pills to have on hand \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0897283554077148\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.0005353319057815846\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3540\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHolding photos of their deceased children, parents lobby Congress to pass online safety legislation\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1028637886047363\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.0002777777777777778\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4109\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAlabama fertility clinic at the center of IVF ruling is resuming some procedures \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.103183627128601\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.00023986567522187575\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4110\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFirst over-the-counter birth control pill hits store shelves \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0831280946731567\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.00023980815347721823\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4666\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -18.9139950477264\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.00021159542953872197\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4889\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInside the organized crime rings plaguing Ulta, T.J. Maxx, Walgreens and other retailers \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1056489944458008\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.00020206102242877348\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5980\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mDeathbed confession leads to bodies of mother and daughter killed 24 years ago \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.092090368270874\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.00016556291390728477\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6521\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFormer Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-14.807022703838651\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.000151952590791673\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9475\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInside a Gen Z campaign to shake up Congress \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0655490159988403\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.0001048767697954903\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11154\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMeet the anti-abortion group using white coats and research to advance its cause \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.089759349822998\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.00008917424647761727\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11236\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mBunnie XO and Jelly Roll announce plans to have a baby using IVF \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.071580171585083\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.00008852691218130312\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".param set query planned parenthood\n", + "\n", + "\n", + ".param set weight_fts 1.0\n", + ".param set weight_vec 1.0\n", + ".param set rrf_k 60\n", + ".param set k 10\n", + "\n", + "\n", + "with vec_matches as (\n", + " select\n", + " article_id,\n", + " row_number() over (order by distance) as rank_number,\n", + " distance\n", + " from vec_headlines\n", + " where\n", + " headline_embedding match lembed(:query)\n", + " and k = :k\n", + " order by distance\n", + "),\n", + "fts_matches as (\n", + " select\n", + " rowid,\n", + " row_number() over (order by rank) as rank_number,\n", + " rank as score\n", + " from fts_headlines\n", + " where headline match :query\n", + " limit :k\n", + "),\n", + "final as (\n", + " select\n", + " articles.id,\n", + " articles.headline,\n", + " vec_matches.distance as vector_distance,\n", + " fts_matches.score as fts_score,\n", + " coalesce(1.0 / (:rrf_k + fts_matches.rowid), 0.0) * :weight_fts +\n", + " coalesce(1.0 / (:rrf_k + vec_matches.article_id), 0.0) * :weight_vec\n", + " as combined_score\n", + "\n", + " from fts_matches\n", + " full outer join vec_matches on vec_matches.article_id = fts_matches.rowid\n", + " join articles on articles.rowid = coalesce(fts_matches.rowid, vec_matches.article_id)\n", + " order by combined_score desc\n", + ")\n", + "select * from final;\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "select json('[1,2,3,4]'), vec_f32(X'AABBCCDD')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "addr\n", + "\n", + "opcode\n", + "\n", + "p1\n", + "\n", + "p2\n", + "\n", + "p3\n", + "\n", + "p4\n", + "\n", + "p5\n", + "\n", + "comment\n", + "\n", + "subprog\n", + "\n", + "nexec\n", + "\n", + "ncycle\n", + "
\n", + "0\n", + "\n", + "Init\n", + "\n", + "0\n", + "\n", + "12\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "1\n", + "\n", + "Null\n", + "\n", + "0\n", + "\n", + "1\n", + "\n", + "1\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "2\n", + "\n", + "VOpen\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "vtab:7F7FCCF08AE0\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "3\n", + "\n", + "Integer\n", + "\n", + "0\n", + "\n", + "2\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "4\n", + "\n", + "Integer\n", + "\n", + "0\n", + "\n", + "3\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "5\n", + "\n", + "VFilter\n", + "\n", + "0\n", + "\n", + "8\n", + "\n", + "2\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "6\n", + "\n", + "AggStep\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "1\n", + "\n", + "count(0)\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "7\n", + "\n", + "VNext\n", + "\n", + "0\n", + "\n", + "6\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "8\n", + "\n", + "AggFinal\n", + "\n", + "1\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "count(0)\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "9\n", + "\n", + "Copy\n", + "\n", + "1\n", + "\n", + "4\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "10\n", + "\n", + "ResultRow\n", + "\n", + "4\n", + "\n", + "1\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "11\n", + "\n", + "Halt\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "12\n", + "\n", + "Transaction\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "11\n", + "\n", + "0\n", + "\n", + "1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "13\n", + "\n", + "Goto\n", + "\n", + "0\n", + "\n", + "1\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "
\n", + "14 rows × 11 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0maddr\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mopcode \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp4 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mcomment\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msubprog\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mnexec\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mncycle\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInit \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m12\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mNull \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVOpen \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvtab:7F7FCCF08AE0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInteger \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInteger \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVFilter \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAggStep \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mcount(0) \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVNext \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAggFinal \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mcount(0) \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mCopy \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mResultRow \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 11\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHalt \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 12\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTransaction\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 13\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mGoto \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "select * from bytecode('select count(*) from pragma_table_list;')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Solite", + "language": "sql", + "name": "solite" + }, + "language_info": { + "file_extension": ".sql", + "mimetype": "text/x.sqlite", + "name": "sql", + "nb_converter": "script", + "pygments_lexer": "sql", + "version": "TODO" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/nbc-headlines/Makefile b/examples/nbc-headlines/Makefile index 2f2d876..8361d12 100644 --- a/examples/nbc-headlines/Makefile +++ b/examples/nbc-headlines/Makefile @@ -1,8 +1,2 @@ -deps: - curl -L https://github.com/asg017/sqlite-rembed/releases/download/v0.0.1-alpha.9/install.sh | sh - curl -L https://github.com/asg017/sqlite-vec/releases/download/v0.0.1-alpha.37/install.sh | sh - wget https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.11/llamafile-0.8.11 - wget https://huggingface.co/leliuga/all-MiniLM-L6-v2-GGUF/resolve/main/all-MiniLM-L6-v2.F16.gguf - - -.PHONY: deps +all-MiniLM-L6-v2.e4ce9877.q8_0.gguf: + curl -L -o $@ https://huggingface.co/asg017/sqlite-lembed-model-examples/resolve/main/all-MiniLM-L6-v2/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf diff --git a/examples/nbc-headlines/README.md b/examples/nbc-headlines/README.md index 804b11a..b77d76a 100644 --- a/examples/nbc-headlines/README.md +++ b/examples/nbc-headlines/README.md @@ -1 +1,4 @@ -https://www.nbcnews.com/archive/articles/last-seven-days +- `headlines-2024.db` + - 14.5k rows + - 4.4MB + diff --git a/examples/nbc-headlines/bench.sql b/examples/nbc-headlines/bench.sql new file mode 100644 index 0000000..6f9781d --- /dev/null +++ b/examples/nbc-headlines/bench.sql @@ -0,0 +1,26 @@ +.open tmp.db +--.load ./vec0 +.load ./lembed0 +.timer on + +insert into lembed_models(name, model) + values ( + 'default', + lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf') + ); + +with subset as ( + select headline from articles limit 1000 +) +select sum(lembed(headline)) from subset; + + +.load ./rembed0 + +insert into rembed_clients(name, options) + values ('default','llamafile'); + +with subset as ( + select headline from articles limit 1000 +) +select sum(rembed('default', headline)) from subset; diff --git a/examples/nbc-headlines/build.sql b/examples/nbc-headlines/build.sql index b397798..626b111 100644 --- a/examples/nbc-headlines/build.sql +++ b/examples/nbc-headlines/build.sql @@ -2,30 +2,44 @@ .header on .bail on -.load ./vec0 -.load ./rembed0 +begin; -insert into rembed_clients(name, options) - values ('llamafile', 'llamafile'); - -create table articles as -select - value ->> 'url' as url, - value ->> 'headline' as headline, - rembed('llamafile', value ->> 'headline') as headline_embedding -from json_each( - readfile('2024-07-26.json') +create virtual table fts_headlines using fts5( + headline, + content='articles', content_rowid='id' ); -select writefile( - 'articles.json', - json_group_array( - json_object( - 'id', rowid, - 'url', url, - 'headline', headline, - 'headline_embedding', vec_to_json(headline_embedding) - ) - ) -) +insert into fts_headlines(rowid, headline) + select rowid, headline + from articles; + +INSERT INTO fts_headlines(fts_headlines) VALUES('optimize'); + +.timer on + +.load ../../dist/vec0 +.load ./lembed0 + +insert into lembed_models(name, model) values + ('default', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf')); + +create virtual table vec_headlines using vec0( + article_id integer primary key, + headline_embedding float[384] +); + +-- 1m23s +insert into vec_headlines(article_id, headline_embedding) +select + rowid, + lembed(headline) from articles; +commit; + + +-- rembed vec0 INSERT: 10m17s +-- before: 4.37 MB +-- /w fts content: 5.35 MB (+0.98 MB) +-- with optimize 5.30 MB (-0.049 MB) +-- w/ fts: 6.67 MB (+2.30 MB) +-- sum(octet_length(headline)): 1.16 MB diff --git a/examples/nbc-headlines/demo.mjs b/examples/nbc-headlines/demo.mjs deleted file mode 100644 index 5b861a5..0000000 --- a/examples/nbc-headlines/demo.mjs +++ /dev/null @@ -1,34 +0,0 @@ -import Database from "better-sqlite3"; -import * as sqliteVec from "sqlite-vec"; -import { pipeline } from "@xenova/transformers"; - -const db = new Database("articles.db"); -sqliteVec.load(db); - -const extractor = await pipeline( - "feature-extraction", - "Xenova/all-MiniLM-L6-v2" -); - -const query = "sports"; -const queryEmbedding = await extractor([query], { - pooling: "mean", - normalize: true, -}); - -const rows = db - .prepare( - ` - select - article_id, - headline, - distance - from vec_articles - left join articles on articles.id = vec_articles.article_id - where headline_embedding match ? - and k = 8; -` - ) - .all(queryEmbedding.data); - -console.log(rows); diff --git a/examples/nbc-headlines/demo.py b/examples/nbc-headlines/demo.py deleted file mode 100644 index 4b643d8..0000000 --- a/examples/nbc-headlines/demo.py +++ /dev/null @@ -1,31 +0,0 @@ -import sqlite3 -import sqlite_vec -from sentence_transformers import SentenceTransformer - -db = sqlite3.connect("articles.db") -db.enable_load_extension(True) -sqlite_vec.load(db) -db.enable_load_extension(False) - -model = SentenceTransformer("all-MiniLM-L6-v2") - - -query = "sports" -query_embedding = model.encode(query) - -results = db.execute( - """ - select - article_id, - headline, - distance - from vec_articles - left join articles on articles.id = vec_articles.article_id - where headline_embedding match ? - and k = 8; - """, - [query_embedding] -).fetchall() - -for (article_id, headline, distance) in results: - print(article_id, headline, distance) diff --git a/examples/nbc-headlines/demo.sql b/examples/nbc-headlines/demo.sql deleted file mode 100644 index 0ab0f55..0000000 --- a/examples/nbc-headlines/demo.sql +++ /dev/null @@ -1,54 +0,0 @@ -.load vec0 -.header on -.bail on -.timer on - -create temp table raw_articles as - select - value ->> 'id' as id, - value ->> 'url' as url, - value ->> 'headline' as headline, - value ->> 'headline_embedding' as headline_embedding - from json_each( - readfile('articles.json') - ); - -create table articles( - id integer primary key, - headline text, - url text -); - -insert into articles(id, headline, url) - select id, headline, url from temp.raw_articles; - -select * from articles limit 5; - -create virtual table vec_articles using vec0( - article_id integer primary key, - headline_embedding float[384] -); - -insert into vec_articles(article_id, headline_embedding) - select id, headline_embedding from temp.raw_articles; - - -select * from articles limit 5; -select article_id, vec_to_json(headline_embedding) from articles limit 5; - -.param set :query 'sports' - -.load ./rembed0 -insert into rembed_clients values ('all-MiniLM-L6-v2', 'llamafile'); - - -.mode qbox -ww - -select - article_id, - --headline, - distance -from vec_articles ---left join articles on articles.id = vec_articles.article_id -where headline_embedding match rembed('all-MiniLM-L6-v2', :query) - and k = 10; diff --git a/examples/nbc-headlines/hybrid.ipynb b/examples/nbc-headlines/hybrid.ipynb new file mode 100644 index 0000000..ac4e79d --- /dev/null +++ b/examples/nbc-headlines/hybrid.ipynb @@ -0,0 +1,897 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[no code]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".open tmp.db\n", + ".load ../../dist/vec0\n", + ".load ./rembed0\n", + "\n", + "insert into rembed_clients(name, options)\n", + " values ('snowflake-arctic-embed-m-v1.5', 'llamafile');\n", + "\n", + "--select vec_version(), rembed_debug();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## FTS Search" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "rowid\n", + "\n", + "headline_highlighted\n", + "\n", + "rank\n", + "
\n", + "4666\n", + "\n", + "Kamala Harris visits <b>Planned</b> <b>Parenthood</b> clinic\n", + "\n", + "-18.9139950477264\n", + "
\n", + "6521\n", + "\n", + "Former Marine sentenced to 9 years in prison for firebombing <b>Planned</b> <b>Parenthood</b> clinic\n", + "\n", + "-14.807022703838651\n", + "
\n", + "
\n", + "2 rows × 3 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mrowid\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline_highlighted \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mrank \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4666\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -18.9139950477264\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6521\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFormer Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-14.807022703838651\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".param set query planned parenthood\n", + "\n", + "select\n", + " rowid,\n", + " highlight(fts_headlines, 0, '', '') as headline_highlighted,\n", + " rank\n", + "from fts_headlines\n", + "where headline match :query\n", + "order by rank\n", + "limit 10;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vector Search" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "ename": "SQL logic error", + "evalue": "Error sending HTTP request: http://localhost:8080/embedding: Connection Failed: Connect error: Connection refused (os error 61)", + "output_type": "error", + "traceback": [] + } + ], + "source": [ + ".param set query planned parenthood\n", + "\n", + "select\n", + " article_id,\n", + " articles.headline,\n", + " distance\n", + "from vec_headlines\n", + "left join articles on articles.rowid = vec_headlines.article_id\n", + "where headline_embedding match vec_normalize(vec_slice(rembed('snowflake-arctic-embed-m-v1.5', :query), 0, 256))\n", + " and k = 10;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## FTS + Vector search: RFF" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "ename": "SQL logic error", + "evalue": "SQL logic error (1) : Error sending HTTP request: http://localhost:8080/embedding: Connection Failed: Connect error: Connection refused (os error 61)", + "output_type": "error", + "traceback": [] + } + ], + "source": [ + ".param set query planned parenthood\n", + "\n", + "insert or replace into temp.sqlite_parameters\n", + " values ('query_embedding', rembed('snowflake-arctic-embed-m-v1.5', :query));\n", + "\n", + ".param set weight_fts 1.0\n", + ".param set weight_vec 1.0\n", + ".param set rrf_k 60\n", + ".param set k 10\n", + "\n", + "\n", + "with vec_matches as (\n", + " select\n", + " article_id,\n", + " row_number() over (order by distance) as rank_number,\n", + " distance\n", + " from vec_headlines\n", + " where\n", + " headline_embedding match vec_slice(:query_embedding, 0, 256)\n", + " and k = :k\n", + " order by distance\n", + "),\n", + "fts_matches as (\n", + " select\n", + " rowid,\n", + " row_number() over (order by rank) as rank_number,\n", + " rank as score\n", + " from fts_headlines\n", + " where headline match :query\n", + " limit :k\n", + "),\n", + "final as (\n", + " select\n", + " articles.id,\n", + " articles.headline,\n", + " vec_matches.distance as vector_distance,\n", + " fts_matches.score as fts_score,\n", + " coalesce(1.0 / (:rrf_k + fts_matches.rowid), 0.0) * :weight_fts +\n", + " coalesce(1.0 / (:rrf_k + vec_matches.article_id), 0.0) * :weight_vec\n", + " as combined_score\n", + "\n", + " from fts_matches\n", + " full outer join vec_matches on vec_matches.article_id = fts_matches.rowid\n", + " join articles on articles.rowid = coalesce(fts_matches.rowid, vec_matches.article_id)\n", + " order by combined_score desc\n", + ")\n", + "select * from final;\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "json('[1,2,3,4]')\n", + "\n", + "vec_f32(X'AABBCCDD')\n", + "
\n", + "[1,2,3,4]\n", + "\n", + "Blob<4>\n", + "
\n", + "
\n", + "1 row × 2 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mjson('[1,2,3,4]')\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec_f32(X'AABBCCDD')\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[1,2,3,4] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<4> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "select json('[1,2,3,4]'), vec_f32(X'AABBCCDD')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "addr\n", + "\n", + "opcode\n", + "\n", + "p1\n", + "\n", + "p2\n", + "\n", + "p3\n", + "\n", + "p4\n", + "\n", + "p5\n", + "\n", + "comment\n", + "\n", + "subprog\n", + "\n", + "nexec\n", + "\n", + "ncycle\n", + "
\n", + "0\n", + "\n", + "Init\n", + "\n", + "0\n", + "\n", + "12\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "1\n", + "\n", + "Null\n", + "\n", + "0\n", + "\n", + "1\n", + "\n", + "1\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "2\n", + "\n", + "VOpen\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "vtab:7FAC27505B30\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "3\n", + "\n", + "Integer\n", + "\n", + "0\n", + "\n", + "2\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "4\n", + "\n", + "Integer\n", + "\n", + "0\n", + "\n", + "3\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "5\n", + "\n", + "VFilter\n", + "\n", + "0\n", + "\n", + "8\n", + "\n", + "2\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "6\n", + "\n", + "AggStep\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "1\n", + "\n", + "count(0)\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "7\n", + "\n", + "VNext\n", + "\n", + "0\n", + "\n", + "6\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "8\n", + "\n", + "AggFinal\n", + "\n", + "1\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "count(0)\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "9\n", + "\n", + "Copy\n", + "\n", + "1\n", + "\n", + "4\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "10\n", + "\n", + "ResultRow\n", + "\n", + "4\n", + "\n", + "1\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "11\n", + "\n", + "Halt\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "12\n", + "\n", + "Transaction\n", + "\n", + "0\n", + "\n", + "0\n", + "\n", + "11\n", + "\n", + "0\n", + "\n", + "1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "13\n", + "\n", + "Goto\n", + "\n", + "0\n", + "\n", + "1\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "0\n", + "
\n", + "
\n", + "14 rows × 11 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0maddr\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mopcode \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp4 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mp5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mcomment\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msubprog\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mnexec\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mncycle\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInit \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m12\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mNull \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVOpen \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvtab:7FAC27505B30\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInteger \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInteger \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVFilter \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAggStep \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mcount(0) \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVNext \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAggFinal \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mcount(0) \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mCopy \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mResultRow \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 11\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHalt \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 12\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTransaction\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 13\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mGoto \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[3m NULL \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "select * from bytecode('select count(*) from pragma_table_list;')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Solite", + "language": "sql", + "name": "solite" + }, + "language_info": { + "file_extension": ".sql", + "mimetype": "text/x.sqlite", + "name": "sqlite", + "nb_converter": "script", + "pygments_lexer": "sql", + "version": "TODO" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/nbc-headlines/index.html b/examples/nbc-headlines/index.html deleted file mode 100644 index a721ddb..0000000 --- a/examples/nbc-headlines/index.html +++ /dev/null @@ -1,56 +0,0 @@ - - -

sqlite-vec articles.db demo

- - - - diff --git a/examples/nbc-headlines/package-lock.json b/examples/nbc-headlines/package-lock.json deleted file mode 100644 index cb833d9..0000000 --- a/examples/nbc-headlines/package-lock.json +++ /dev/null @@ -1,801 +0,0 @@ -{ - "name": "nbc-headlines", - "version": "1.0.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "nbc-headlines", - "version": "1.0.0", - "license": "ISC", - "dependencies": { - "@xenova/transformers": "^2.17.2", - "better-sqlite3": "^11.1.2", - "sqlite-vec": "^0.0.1-alpha.37" - } - }, - "node_modules/@huggingface/jinja": { - "version": "0.2.2", - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/@protobufjs/aspromise": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", - "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/base64": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", - "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/codegen": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", - "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/eventemitter": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", - "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/fetch": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", - "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", - "license": "BSD-3-Clause", - "dependencies": { - "@protobufjs/aspromise": "^1.1.1", - "@protobufjs/inquire": "^1.1.0" - } - }, - "node_modules/@protobufjs/float": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", - "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/inquire": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", - "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/path": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", - "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/pool": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", - "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/utf8": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", - "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", - "license": "BSD-3-Clause" - }, - "node_modules/@types/long": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz", - "integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==", - "license": "MIT" - }, - "node_modules/@types/node": { - "version": "20.14.12", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.12.tgz", - "integrity": "sha512-r7wNXakLeSsGT0H1AU863vS2wa5wBOK4bWMjZz2wj+8nBx+m5PeIn0k8AloSLpRuiwdRQZwarZqHE4FNArPuJQ==", - "license": "MIT", - "dependencies": { - "undici-types": "~5.26.4" - } - }, - "node_modules/@xenova/transformers": { - "version": "2.17.2", - "resolved": "https://registry.npmjs.org/@xenova/transformers/-/transformers-2.17.2.tgz", - "integrity": "sha512-lZmHqzrVIkSvZdKZEx7IYY51TK0WDrC8eR0c5IMnBsO8di8are1zzw8BlLhyO2TklZKLN5UffNGs1IJwT6oOqQ==", - "license": "Apache-2.0", - "dependencies": { - "@huggingface/jinja": "^0.2.2", - "onnxruntime-web": "1.14.0", - "sharp": "^0.32.0" - }, - "optionalDependencies": { - "onnxruntime-node": "1.14.0" - } - }, - "node_modules/b4a": { - "version": "1.6.6", - "license": "Apache-2.0" - }, - "node_modules/bare-events": { - "version": "2.4.2", - "license": "Apache-2.0", - "optional": true - }, - "node_modules/bare-fs": { - "version": "2.3.1", - "license": "Apache-2.0", - "optional": true, - "dependencies": { - "bare-events": "^2.0.0", - "bare-path": "^2.0.0", - "bare-stream": "^2.0.0" - } - }, - "node_modules/bare-os": { - "version": "2.4.0", - "license": "Apache-2.0", - "optional": true - }, - "node_modules/bare-path": { - "version": "2.1.3", - "license": "Apache-2.0", - "optional": true, - "dependencies": { - "bare-os": "^2.1.0" - } - }, - "node_modules/bare-stream": { - "version": "2.1.3", - "license": "Apache-2.0", - "optional": true, - "dependencies": { - "streamx": "^2.18.0" - } - }, - "node_modules/base64-js": { - "version": "1.5.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/better-sqlite3": { - "version": "11.1.2", - "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-11.1.2.tgz", - "integrity": "sha512-gujtFwavWU4MSPT+h9B+4pkvZdyOUkH54zgLdIrMmmmd4ZqiBIrRNBzNzYVFO417xo882uP5HBu4GjOfaSrIQw==", - "hasInstallScript": true, - "license": "MIT", - "dependencies": { - "bindings": "^1.5.0", - "prebuild-install": "^7.1.1" - } - }, - "node_modules/bindings": { - "version": "1.5.0", - "license": "MIT", - "dependencies": { - "file-uri-to-path": "1.0.0" - } - }, - "node_modules/bl": { - "version": "4.1.0", - "license": "MIT", - "dependencies": { - "buffer": "^5.5.0", - "inherits": "^2.0.4", - "readable-stream": "^3.4.0" - } - }, - "node_modules/buffer": { - "version": "5.7.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "base64-js": "^1.3.1", - "ieee754": "^1.1.13" - } - }, - "node_modules/chownr": { - "version": "1.1.4", - "license": "ISC" - }, - "node_modules/color": { - "version": "4.2.3", - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1", - "color-string": "^1.9.0" - }, - "engines": { - "node": ">=12.5.0" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "license": "MIT" - }, - "node_modules/color-string": { - "version": "1.9.1", - "license": "MIT", - "dependencies": { - "color-name": "^1.0.0", - "simple-swizzle": "^0.2.2" - } - }, - "node_modules/decompress-response": { - "version": "6.0.0", - "license": "MIT", - "dependencies": { - "mimic-response": "^3.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/deep-extend": { - "version": "0.6.0", - "license": "MIT", - "engines": { - "node": ">=4.0.0" - } - }, - "node_modules/detect-libc": { - "version": "2.0.3", - "license": "Apache-2.0", - "engines": { - "node": ">=8" - } - }, - "node_modules/end-of-stream": { - "version": "1.4.4", - "license": "MIT", - "dependencies": { - "once": "^1.4.0" - } - }, - "node_modules/expand-template": { - "version": "2.0.3", - "license": "(MIT OR WTFPL)", - "engines": { - "node": ">=6" - } - }, - "node_modules/fast-fifo": { - "version": "1.3.2", - "license": "MIT" - }, - "node_modules/file-uri-to-path": { - "version": "1.0.0", - "license": "MIT" - }, - "node_modules/flatbuffers": { - "version": "1.12.0", - "license": "SEE LICENSE IN LICENSE.txt" - }, - "node_modules/fs-constants": { - "version": "1.0.0", - "license": "MIT" - }, - "node_modules/github-from-package": { - "version": "0.0.0", - "license": "MIT" - }, - "node_modules/guid-typescript": { - "version": "1.0.9", - "license": "ISC" - }, - "node_modules/ieee754": { - "version": "1.2.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "BSD-3-Clause" - }, - "node_modules/inherits": { - "version": "2.0.4", - "license": "ISC" - }, - "node_modules/ini": { - "version": "1.3.8", - "license": "ISC" - }, - "node_modules/is-arrayish": { - "version": "0.3.2", - "license": "MIT" - }, - "node_modules/long": { - "version": "4.0.0", - "license": "Apache-2.0" - }, - "node_modules/mimic-response": { - "version": "3.1.0", - "license": "MIT", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/minimist": { - "version": "1.2.8", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/mkdirp-classic": { - "version": "0.5.3", - "license": "MIT" - }, - "node_modules/napi-build-utils": { - "version": "1.0.2", - "license": "MIT" - }, - "node_modules/node-abi": { - "version": "3.65.0", - "license": "MIT", - "dependencies": { - "semver": "^7.3.5" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/node-addon-api": { - "version": "6.1.0", - "license": "MIT" - }, - "node_modules/once": { - "version": "1.4.0", - "license": "ISC", - "dependencies": { - "wrappy": "1" - } - }, - "node_modules/onnx-proto": { - "version": "4.0.4", - "license": "MIT", - "dependencies": { - "protobufjs": "^6.8.8" - } - }, - "node_modules/onnx-proto/node_modules/protobufjs": { - "version": "6.11.4", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.4.tgz", - "integrity": "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw==", - "hasInstallScript": true, - "license": "BSD-3-Clause", - "dependencies": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - }, - "bin": { - "pbjs": "bin/pbjs", - "pbts": "bin/pbts" - } - }, - "node_modules/onnxruntime-common": { - "version": "1.14.0", - "license": "MIT" - }, - "node_modules/onnxruntime-node": { - "version": "1.14.0", - "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.14.0.tgz", - "integrity": "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w==", - "license": "MIT", - "optional": true, - "os": [ - "win32", - "darwin", - "linux" - ], - "dependencies": { - "onnxruntime-common": "~1.14.0" - } - }, - "node_modules/onnxruntime-web": { - "version": "1.14.0", - "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.14.0.tgz", - "integrity": "sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw==", - "license": "MIT", - "dependencies": { - "flatbuffers": "^1.12.0", - "guid-typescript": "^1.0.9", - "long": "^4.0.0", - "onnx-proto": "^4.0.4", - "onnxruntime-common": "~1.14.0", - "platform": "^1.3.6" - } - }, - "node_modules/platform": { - "version": "1.3.6", - "license": "MIT" - }, - "node_modules/prebuild-install": { - "version": "7.1.2", - "license": "MIT", - "dependencies": { - "detect-libc": "^2.0.0", - "expand-template": "^2.0.3", - "github-from-package": "0.0.0", - "minimist": "^1.2.3", - "mkdirp-classic": "^0.5.3", - "napi-build-utils": "^1.0.1", - "node-abi": "^3.3.0", - "pump": "^3.0.0", - "rc": "^1.2.7", - "simple-get": "^4.0.0", - "tar-fs": "^2.0.0", - "tunnel-agent": "^0.6.0" - }, - "bin": { - "prebuild-install": "bin.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/pump": { - "version": "3.0.0", - "license": "MIT", - "dependencies": { - "end-of-stream": "^1.1.0", - "once": "^1.3.1" - } - }, - "node_modules/queue-tick": { - "version": "1.0.1", - "license": "MIT" - }, - "node_modules/rc": { - "version": "1.2.8", - "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", - "dependencies": { - "deep-extend": "^0.6.0", - "ini": "~1.3.0", - "minimist": "^1.2.0", - "strip-json-comments": "~2.0.1" - }, - "bin": { - "rc": "cli.js" - } - }, - "node_modules/readable-stream": { - "version": "3.6.2", - "license": "MIT", - "dependencies": { - "inherits": "^2.0.3", - "string_decoder": "^1.1.1", - "util-deprecate": "^1.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/safe-buffer": { - "version": "5.2.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/semver": { - "version": "7.6.3", - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/sharp": { - "version": "0.32.6", - "hasInstallScript": true, - "license": "Apache-2.0", - "dependencies": { - "color": "^4.2.3", - "detect-libc": "^2.0.2", - "node-addon-api": "^6.1.0", - "prebuild-install": "^7.1.1", - "semver": "^7.5.4", - "simple-get": "^4.0.1", - "tar-fs": "^3.0.4", - "tunnel-agent": "^0.6.0" - }, - "engines": { - "node": ">=14.15.0" - }, - "funding": { - "url": "https://opencollective.com/libvips" - } - }, - "node_modules/sharp/node_modules/tar-fs": { - "version": "3.0.6", - "license": "MIT", - "dependencies": { - "pump": "^3.0.0", - "tar-stream": "^3.1.5" - }, - "optionalDependencies": { - "bare-fs": "^2.1.1", - "bare-path": "^2.1.0" - } - }, - "node_modules/sharp/node_modules/tar-stream": { - "version": "3.1.7", - "license": "MIT", - "dependencies": { - "b4a": "^1.6.4", - "fast-fifo": "^1.2.0", - "streamx": "^2.15.0" - } - }, - "node_modules/simple-concat": { - "version": "1.0.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/simple-get": { - "version": "4.0.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "decompress-response": "^6.0.0", - "once": "^1.3.1", - "simple-concat": "^1.0.0" - } - }, - "node_modules/simple-swizzle": { - "version": "0.2.2", - "license": "MIT", - "dependencies": { - "is-arrayish": "^0.3.1" - } - }, - "node_modules/sqlite-vec": { - "version": "0.0.1-alpha.37", - "resolved": "https://registry.npmjs.org/sqlite-vec/-/sqlite-vec-0.0.1-alpha.37.tgz", - "integrity": "sha512-ZbdIGPLXVr7cxy9pjwxT8MYBd342bLVbLdV3ZiZz0FpdT5yi0lXyYSh/BEtt0FNrRTwXSLDLL3saTKih6KYzaQ==", - "license": "MIT OR Apache", - "optionalDependencies": { - "sqlite-vec-darwin-arm64": "0.0.1-alpha.37", - "sqlite-vec-darwin-x64": "0.0.1-alpha.37", - "sqlite-vec-linux-x64": "0.0.1-alpha.37", - "sqlite-vec-windows-x64": "0.0.1-alpha.37" - } - }, - "node_modules/sqlite-vec-darwin-arm64": { - "version": "0.0.1-alpha.37", - "resolved": "https://registry.npmjs.org/sqlite-vec-darwin-arm64/-/sqlite-vec-darwin-arm64-0.0.1-alpha.37.tgz", - "integrity": "sha512-3cPdW8JbNVZ08bwsCMsAVv0atZsvRY6Co3bgbiOVjwduBYNd3cTCC6Q+ICjz+H4hTEMZNqGdIWEeCTLgbvnEPw==", - "cpu": [ - "arm64" - ], - "license": "MIT OR Apache", - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/sqlite-vec-darwin-x64": { - "version": "0.0.1-alpha.37", - "resolved": "https://registry.npmjs.org/sqlite-vec-darwin-x64/-/sqlite-vec-darwin-x64-0.0.1-alpha.37.tgz", - "integrity": "sha512-/56xiUbONFw+g3x/UlzGP2ykMDkrQ10trZVxk2Mmshd1y1QOqCoJoWzzcFm3OdaHI/Gz9vOJdCA8Py1QAr7Xqg==", - "cpu": [ - "x64" - ], - "license": "MIT OR Apache", - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/sqlite-vec-linux-x64": { - "version": "0.0.1-alpha.37", - "resolved": "https://registry.npmjs.org/sqlite-vec-linux-x64/-/sqlite-vec-linux-x64-0.0.1-alpha.37.tgz", - "integrity": "sha512-BEyesm7Vo4EzE+ZQR1CY4M+lHNQufG26q1rkT5LKv6bIyPpebd+zs+hC0ndJmiNjLDNqqsbCsLQoTfLpGG2Urg==", - "cpu": [ - "x64" - ], - "license": "MIT OR Apache", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/sqlite-vec-windows-x64": { - "version": "0.0.1-alpha.37", - "resolved": "https://registry.npmjs.org/sqlite-vec-windows-x64/-/sqlite-vec-windows-x64-0.0.1-alpha.37.tgz", - "integrity": "sha512-jPSrUQNuFXy0Y6weSGS3ivU6pZp9nrvHHP3rseXFHuVj+pYK7gN6h1bC20J9Ch/BVz+9S2cVouoO/HvV90+E8w==", - "cpu": [ - "x64" - ], - "license": "MIT OR Apache", - "optional": true, - "os": [ - "windows" - ] - }, - "node_modules/streamx": { - "version": "2.18.0", - "license": "MIT", - "dependencies": { - "fast-fifo": "^1.3.2", - "queue-tick": "^1.0.1", - "text-decoder": "^1.1.0" - }, - "optionalDependencies": { - "bare-events": "^2.2.0" - } - }, - "node_modules/string_decoder": { - "version": "1.3.0", - "license": "MIT", - "dependencies": { - "safe-buffer": "~5.2.0" - } - }, - "node_modules/strip-json-comments": { - "version": "2.0.1", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/tar-fs": { - "version": "2.1.1", - "license": "MIT", - "dependencies": { - "chownr": "^1.1.1", - "mkdirp-classic": "^0.5.2", - "pump": "^3.0.0", - "tar-stream": "^2.1.4" - } - }, - "node_modules/tar-stream": { - "version": "2.2.0", - "license": "MIT", - "dependencies": { - "bl": "^4.0.3", - "end-of-stream": "^1.4.1", - "fs-constants": "^1.0.0", - "inherits": "^2.0.3", - "readable-stream": "^3.1.1" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/text-decoder": { - "version": "1.1.1", - "license": "Apache-2.0", - "dependencies": { - "b4a": "^1.6.4" - } - }, - "node_modules/tunnel-agent": { - "version": "0.6.0", - "license": "Apache-2.0", - "dependencies": { - "safe-buffer": "^5.0.1" - }, - "engines": { - "node": "*" - } - }, - "node_modules/undici-types": { - "version": "5.26.5", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "license": "MIT" - }, - "node_modules/util-deprecate": { - "version": "1.0.2", - "license": "MIT" - }, - "node_modules/wrappy": { - "version": "1.0.2", - "license": "ISC" - } - } -} diff --git a/examples/nbc-headlines/package.json b/examples/nbc-headlines/package.json deleted file mode 100644 index 6931ea6..0000000 --- a/examples/nbc-headlines/package.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "name": "nbc-headlines", - "version": "1.0.0", - "main": "demo.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "keywords": [], - "author": "", - "license": "ISC", - "description": "", - "dependencies": { - "@xenova/transformers": "^2.17.2", - "better-sqlite3": "^11.1.2", - "sqlite-vec": "^0.0.1-alpha.37" - } -} diff --git a/examples/nbc-headlines/query.sql b/examples/nbc-headlines/query.sql new file mode 100644 index 0000000..6ac98a4 --- /dev/null +++ b/examples/nbc-headlines/query.sql @@ -0,0 +1,57 @@ +.load ../../dist/vec0 +.load ./rembed0 +insert into rembed_clients(name, options) + values ('snowflake-arctic-embed-m-v1.5', 'llamafile'); + +.bail on +.mode box +.header on +.timer on + +.param set :query 'death row' +.param set :weight_fts 1.0 +.param set :weight_vec 1.0 +.param set :rrf_k 60 +.param set :query_embedding "vec_normalize(vec_slice(rembed('snowflake-arctic-embed-m-v1.5', :query), 0, 256))" +.param set :k 10 + +select 'Hybrid w/ RRF' as ""; + +with vec_matches as ( + select + article_id, + row_number() over (order by distance) as rank_number, + distance + from vec_headlines + where + headline_embedding match :query_embedding + and k = :k + order by distance +), +fts_matches as ( + select + rowid, + --highlight(fts_headlines, 0, '', '') as headline_highlighted, + row_number() over (order by rank) as rank_number, + rank as score + from fts_headlines + where headline match :query + limit :k +), +final as ( + select + articles.id, + articles.headline, + vec_matches.distance as vector_distance, + fts_matches.score as fts_score, + coalesce(1.0 / (:rrf_k + fts_matches.rowid), 0.0) * :weight_fts + + coalesce(1.0 / (:rrf_k + vec_matches.article_id), 0.0) * :weight_vec + as combined_score + + from fts_matches + full outer join vec_matches on vec_matches.article_id = fts_matches.rowid + join articles on articles.rowid = coalesce(fts_matches.rowid, vec_matches.article_id) + order by combined_score desc +) +select * from final; + diff --git a/examples/nbc-headlinesx/scrape.ipynb b/examples/nbc-headlinesx/scrape.ipynb deleted file mode 100644 index 399e88f..0000000 --- a/examples/nbc-headlinesx/scrape.ipynb +++ /dev/null @@ -1,144 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import { parseHTML } from \"npm:linkedom\";\n", - "import { Database, Statement } from \"jsr:@db/sqlite@0.11\";\n", - "import * as sqlitePath from \"npm:sqlite-path\";\n", - "import * as sqliteUrl from \"npm:sqlite-url\";\n", - "import * as sqliteRegex from \"npm:sqlite-regex\";" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "const months = [\"january\", \"february\", \"march\", \"april\", \"may\", \"june\", \"july\", \"august\", \"september\", \"october\", \"november\", \"december\"]\n", - "\n", - "const db = new Database(\":memory:\")\n", - "db.enableLoadExtension = true;\n", - "db.loadExtension(sqlitePath.getLoadablePath());\n", - "db.loadExtension(sqliteUrl.getLoadablePath());\n", - "db.loadExtension(sqliteRegex.getLoadablePath());\n", - "db.enableLoadExtension = false;\n", - "\n", - "db.exec(`\n", - " CREATE TABLE articles(\n", - " slug_id TEXT,\n", - " slug TEXT,\n", - " headline TEXT,\n", - " url TEXT,\n", - " year integer,\n", - " month integer,\n", - " category1 TEXT,\n", - " category2 TEXT\n", - " )\n", - "`)\n", - "\n", - "const stmt = db.prepare(`\n", - " insert into articles\n", - " select\n", - " regex_capture(\n", - " '(?P.+)-(?Prcna\\\\d+)',\n", - " path_at(url_path(:url), -1),\n", - " 'id'\n", - " ) as id,\n", - " regex_capture(\n", - " '(?P.+)-(?Prcna\\\\d+)',\n", - " path_at(url_path(:url), -1),\n", - " 'slug'\n", - " ) as slug,\n", - " :headline as headline,\n", - " :url as url,\n", - " :year as year,\n", - " :month as month,\n", - " path_at(url_path(:url), 0) as category1,\n", - " iif(\n", - " path_length(url_path(:url)) > 2,\n", - " path_at(url_path(:url), 1),\n", - " null\n", - " ) as category2\n", - "`);\n", - "\n", - "const insertArticles = db.transaction((year, month, articles) => {\n", - " for(const article of articles) {\n", - " stmt.run({...article, year, month})\n", - " }\n", - "})\n", - "\n", - "async function insertMonth(year:number, month: text) {\n", - " const monthPage = await fetch(`https://www.nbcnews.com/archive/articles/${year}/${month}`).then(r=>r.text())\n", - " const {document:monthPageDoc} = parseHTML(monthPage);\n", - " const monthEntries = monthPageDoc\n", - " .querySelectorAll('.MonthPage a')\n", - " .map(a => ({headline: a.innerText, url: a.getAttribute('href')}));\n", - " insertArticles(year, months.findIndex(m => m === month)+1, monthEntries)\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "for(let year = 2014; year <= 2023; year++) {\n", - " for(const month of months) {\n", - " console.log(year, month);\n", - " await insertMonth(year, month);\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "db.exec(\"vacuum into 'articles.db'\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "db.sql`select * from articles order by random() limit 10`" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Deno", - "language": "typescript", - "name": "deno" - }, - "language_info": { - "codemirror_mode": "typescript", - "file_extension": ".ts", - "mimetype": "text/x.typescript", - "name": "typescript", - "nbconvert_exporter": "script", - "pygments_lexer": "typescript", - "version": "5.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}