From f43ae7a7419e2c89af6a1c8623c08c26bb922137 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Wed, 2 Oct 2024 10:24:49 -0700 Subject: [PATCH] nbc headlines updates --- examples/nbc-headlines/1_scrape.ipynb | 22 + examples/nbc-headlines/2_build.ipynb | 917 +++++++++++++-------- examples/nbc-headlines/3_search.ipynb | 1055 ++++++++++++++++--------- 3 files changed, 1293 insertions(+), 701 deletions(-) diff --git a/examples/nbc-headlines/1_scrape.ipynb b/examples/nbc-headlines/1_scrape.ipynb index cb012a1..69a44cd 100644 --- a/examples/nbc-headlines/1_scrape.ipynb +++ b/examples/nbc-headlines/1_scrape.ipynb @@ -1,5 +1,27 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NBC News Headlines: Scraper\n", + "\n", + "This notebooks implements a scraper for [NBC News](https://www.nbcnews.com) headlines. It uses [this sitemap](https://www.nbcnews.com/archive/articles/2024/march), which provides a list of article headlines + URLs\n", + "for every month for the past few years. \n", + "\n", + "This dataset is mostly to get a simple, real-world small text dataset for testing embeddings. \n", + "They're small pieces of text (~dozen words), have a wide range of semantic meaning, and are more \"real-world\"\n", + "them some other embeddings datasets out there.\n", + "\n", + "This notebook uses [Deno](https://deno.com/), [linkedom](https://github.com/WebReflection/linkedom), and a few \n", + "SQLite extensions to scrape the headlines for a given date range. It creates a single SQL table, `articles`, \n", + "with a few columns like `headline` and `url`. By default it will get all article headlines from January 2024 -> present\n", + "and save them to a database called `headlines-2024.db`. Feel free to copy+paste this code into your own custom scraper. \n", + "\n", + "This notebook also just scrapes the data into a SQLite database, it does NOT do any embeddings + vector search. \n", + "For those examples of those, see [`./2_build.ipynb`](./2_build.ipynb) and [`./3_search.ipynb`](./3_search.ipynb)." + ] + }, { "cell_type": "code", "execution_count": 43, diff --git a/examples/nbc-headlines/2_build.ipynb b/examples/nbc-headlines/2_build.ipynb index 7fe79fb..44b3ac5 100644 --- a/examples/nbc-headlines/2_build.ipynb +++ b/examples/nbc-headlines/2_build.ipynb @@ -1,9 +1,31 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 3, + "cell_type": "markdown", "metadata": {}, + "source": [ + "# NBC News Headlines: Building FTS5 + `vec0` indexes\n", + "\n", + "Using the dataset built in [the previous `./1_scrape.ipynb` notebook](./1_scrape.ipynb), \n", + "this notebook will enrich that dataset with a full-text search index and a semantic search index,\n", + "using [FTS5](https://www.sqlite.org/fts5.html), \n", + "[`sqlite-vec`](https://github.com/asg017/sqlite-vec), and \n", + "[`sqlite-lembed`](https://github.com/asg017/sqlite-lembed).\n", + "\n", + "This example will use pure SQL for everything. You can do the same exact thing in Python/JavaScript/Go/Rust/etc., or use\n", + "your own embeddings providers like Ollama/llamafile/OpenAI/etc. The core mechanics of FTS5 and `sqlite-vec` will remain the same. \n", + "\n", + "We will use the [Snowflake Artic Embed v1.5](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5) embeddings model to generate embeddings. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -11,160 +33,33 @@ "[no code]" ] }, - "execution_count": 3, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ - ".open tmp3.db" + ".open tmp-artic2.db" ] }, { - "cell_type": "code", - "execution_count": 2, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "schema\n", - "\n", - "name\n", - "\n", - "type\n", - "\n", - "ncol\n", - "\n", - "wr\n", - "\n", - "strict\n", - "
\n", - "main\n", - "\n", - "sqlite_sequence\n", - "\n", - "table\n", - "\n", - "2\n", - "\n", - "0\n", - "\n", - "0\n", - "
\n", - "main\n", - "\n", - "articles\n", - "\n", - "table\n", - "\n", - "9\n", - "\n", - "0\n", - "\n", - "0\n", - "
\n", - "main\n", - "\n", - "sqlite_schema\n", - "\n", - "table\n", - "\n", - "5\n", - "\n", - "0\n", - "\n", - "0\n", - "
\n", - "temp\n", - "\n", - "sqlite_temp_schema\n", - "\n", - "table\n", - "\n", - "5\n", - "\n", - "0\n", - "\n", - "0\n", - "
\n", - "
\n", - "4 rows × 6 columns\n", - "
\n", - "
\n" - ], - "text/plain": [ - "\u001b[0m┌\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┐\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mschema\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mname \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtype \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mncol\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mwr\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mstrict\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┤\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_sequence \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticles \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mmain \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_schema \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtemp \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0msqlite_temp_schema\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mtable\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────\u001b[0m\u001b[0m┘\n", - "\u001b[0m\u001b[0m" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "select * from pragma_table_list;" + "# Step 1: Create a FTS5 index\n", + "\n", + "Creating a full-text search index is as simple as 3 SQL commands! We already have the headlines stored in the `articles` \n", + "table under the `headline` column, so it's just a matter of initializing the FTS5 virtual table and inserting the data." ] }, { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -193,22 +88,40 @@ } ], "source": [ - "create virtual table fts_headlines using fts5(\n", + "create virtual table fts_articles using fts5(\n", " headline,\n", " content='articles', content_rowid='id'\n", ");\n", "\n", - "insert into fts_headlines(rowid, headline)\n", + "insert into fts_articles(rowid, headline)\n", " select rowid, headline\n", " from articles;\n", "\n", - "insert into fts_headlines(fts_headlines) values('optimize');" + "insert into fts_articles(fts_articles) values('optimize');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By convention we name the FTS5 table `fts_articles`, where the `fts_` prefix says \"this virtual table is full-text search of the `articles` table\". We are only searching the `headline` column, the rest can be ignored. \n", + "\n", + "Here we are using the [\"external content tables\"](https://www.sqlite.org/fts5.html#external_content_tables)\n", + "feature in FTS5 tables, which will avoid storing the headlines a 2nd time, since they already exist in the `articles` table. \n", + "This part isn't required, but saves us a bit of storage. \n", + "\n", + "We also use the [`'optimize'`](https://www.sqlite.org/fts5.html#the_optimize_command) command\n", + " to keep things tidy. This doesn't do much on such a small dataset, but is important to remember for larger tables!" ] }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, + "execution_count": 25, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -225,92 +138,78 @@ "\n", "\n", "\n", - "Washington state faces first outbreak of a deadly fungal infection that's on the rise in the U.S.\n", + "Kamala Harris visits Planned Parenthood clinic\n", "\n", "\n", "\n", "\n", - "Israel-Hamas war live updates: U.S. readies weeks of retaliatory strikes against Iran-linked targets\n", - "\n", - "\n", - "\n", - "\n", - "House to vote on an expanded child tax credit bill\n", - "\n", - "\n", - "\n", - "\n", - "Travel costs, staff and ads added up before Ron DeSantis dropped out\n", - "\n", - "\n", - "\n", - "\n", - "Victims of Hamas attack in Israel and their families blame Iran in new federal lawsuit\n", - "\n", - "\n", - "\n", - "\n", - "Trump meets with Teamsters as he targets Biden support\n", - "\n", - "\n", - "\n", - "\n", - "The bipartisan border deal would not allow 5,000 illegal crossings per day, despite what Trump says\n", - "\n", - "\n", - "\n", - "\n", - "Machu Picchu tourism suffering after week of protests against new ticketing system\n", - "\n", - "\n", - "\n", - "\n", - "FCC moves to criminalize most AI-generated robocalls\n", - "\n", - "\n", - "\n", - "\n", - "Civil rights group says N.C. public schools are harming LGBTQ students, violating federal law\n", + "Former Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\n", "\n", "\n", "\n", "\n", "
\n", - "10 rows × 1 column\n", + "2 rows × 1 column\n", "
\n", "\n" ], "text/plain": [ - "\u001b[0m┌\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWashington state faces first outbreak of a deadly fungal infection that's on the rise in the U.S. \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIsrael-Hamas war live updates: U.S. readies weeks of retaliatory strikes against Iran-linked targets\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHouse to vote on an expanded child tax credit bill \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTravel costs, staff and ads added up before Ron DeSantis dropped out \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mVictims of Hamas attack in Israel and their families blame Iran in new federal lawsuit \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump meets with Teamsters as he targets Biden support \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mThe bipartisan border deal would not allow 5,000 illegal crossings per day, despite what Trump says \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMachu Picchu tourism suffering after week of protests against new ticketing system \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFCC moves to criminalize most AI-generated robocalls \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mCivil rights group says N.C. public schools are harming LGBTQ students, violating federal law \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m┌\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFormer Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n", "\u001b[0m\u001b[0m" ] }, - "execution_count": 4, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "select * from fts_headlines limit 10;" + "select *\n", + "from fts_articles\n", + "where headline match 'planned parenthood'\n", + "limit 10;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Create a \"semantic index\"\n", + "\n", + "\"Semantic index\" in this case is just a fancy way of saying \"vector store\", which we will do with a `sqlite-vec` `vec0` virtual table. \n", + "\n", + "Now, `sqlite-vec` just stores vectors, it doesn't generate embeddings for us. There are hundreds of different remote APIs or local inference runtimes you can use to generate embeddings,\n", + "but here we will use [`sqlite-lembed`](https://github.com/asg017/sqlite-lembed) to keep everything local and everything in pure SQL. \n", + "\n", + "We will need to choose an embeddings model in the [GGUF format](https://huggingface.co/docs/hub/en/gguf),\n", + "since `sqlite-lembed` uses [llama.cpp](https://github.com/ggerganov/llama.cpp) under the hood. \n", + "Here we will use [`Snowflake/snowflake-arctic-embed-m-v1.5`](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5),\n", + "where we can find a GGUF version [here](https://huggingface.co/asg017/sqlite-lembed-model-examples/tree/main/snowflake-arctic-embed-m-v1.5). \n", + "This model is small-sh (`436MB` full-sized, `118MB` at `Q8_0` quantized), and is trained on fairly recent data so it understands\n", + "recent events like \"COVID-19\" or \"Kamala Harris\". \n", + "\n", + "You can download a `.gguf` quantized version of this model with:\n", + "\n", + "```bash\n", + "wget https://huggingface.co/asg017/sqlite-lembed-model-examples/resolve/main/snowflake-arctic-embed-m-v1.5/snowflake-arctic-embed-m-v1.5.d70deb40.f16.gguf\n", + "```\n", + "\n", + "And we can configure `sqlite-lembed` to use this model like so:" ] }, { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -343,22 +242,65 @@ ".load ../../dist/vec0\n", "\n", "insert into lembed_models(name, model) values\n", - " ('default', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf'));" + " ('default', lembed_model_from_file('./snowflake-arctic-embed-m-v1.5.d70deb40.f16.gguf'));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's embeddings time! We can use the `lembed()` function, which takes in text and returns a vector representation of that text,\n", + "as an embeddings BLOB that we can insert directly into a `vec0` virtul table. \n", + "\n", + "We'll declare this new `vec_articles` table, using the `vec_` prefix as convention. This matches the `fts_articles` table above. \n", + "The Snowflake embedding model generate vectors with `768` dimensions, which we we store as-as. \n", + "\n", + "Embedding and inserting into this vector store is as easy as a single `INSERT INTO` and `lembed()` call." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 9, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "0 row × 0 column\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\n", - "create virtual table vec_headlines using vec0(\n", + "create virtual table vec_articles using vec0(\n", " article_id integer primary key,\n", - " headline_embedding float[384]\n", + " headline_embedding float[768]\n", ");\n", "\n", - "insert into vec_headlines(article_id, headline_embedding)\n", + "insert into vec_articles(article_id, headline_embedding)\n", "select\n", " rowid,\n", " lembed(headline)\n", @@ -366,9 +308,22 @@ ] }, { - "cell_type": "code", - "execution_count": 8, + "cell_type": "markdown", "metadata": {}, + "source": [ + "This took ~13 minutes for ~14,500 embeddings on my older 2019 Macbook, but newer computers with better CPUs will finish quicker (it took `2m20s` on my newer Mac M1 Mini). \n", + "\n", + "Once the `vec_articles` is ready, we can perform a KNN query like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -378,164 +333,504 @@ "\n", "\n", "\n", - "article_id\n", + "headline\n", "\n", "\n", - "headline_embedding\n", - "\n", - "\n", - "vec_to_json(vec_slice(headline_embedding, 0, 8))\n", + "distance\n", "\n", "\n", "\n", "\n", "\n", + "\n", + "Kamala Harris visits Planned Parenthood clinic\n", + "\n", "\n", - "1\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[0.055018,-0.021632,-0.012835,0.048403,0.039037,-0.012824,-0.043627,0.031868]\n", + "0.492593914270401\n", "\n", "\n", "\n", + "\n", + "After Dobbs decision, more women are managing their own abortions\n", + "\n", "\n", - "2\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[0.048287,0.023883,-0.004665,0.001806,0.030342,0.050691,0.050082,-0.127660]\n", + "0.5789032578468323\n", "\n", "\n", "\n", + "\n", + "Transforming Healthcare\n", + "\n", "\n", - "3\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[-0.042424,-0.019893,0.022101,-0.030609,-0.016659,0.008453,-0.056492,0.093258]\n", + "0.5822411179542542\n", "\n", "\n", "\n", + "\n", + "A timeline of Trump's many, many positions on abortion\n", + "\n", "\n", - "4\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[0.076178,-0.080511,0.034440,0.027351,0.028441,0.038463,-0.023355,0.089898]\n", + "0.6101462841033936\n", "\n", "\n", "\n", + "\n", + "How a network of abortion pill providers works together in the wake of new threats\n", + "\n", "\n", - "5\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[0.028183,0.091150,-0.043882,0.028064,0.010961,0.018683,0.011500,-0.015776]\n", + "0.6196886897087097\n", "\n", "\n", "\n", + "\n", + "'Major hurdles': The reality check behind Biden's big abortion promise\n", + "\n", "\n", - "6\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[-0.061114,-0.031104,0.060050,-0.037375,0.007963,-0.049056,-0.042365,-0.021792]\n", + "0.6198344826698303\n", "\n", "\n", "\n", + "\n", + "Trump's conflicting abortion stances are coming back to haunt him — and his party\n", + "\n", "\n", - "7\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[0.059814,0.026079,0.061488,0.011823,0.048770,-0.035152,0.031329,-0.015644]\n", + "0.6198986768722534\n", "\n", "\n", "\n", + "\n", + "Where abortion rights could be on the ballot this fall: From the Politics Desk\n", + "\n", "\n", - "8\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[0.095066,0.001522,-0.030417,0.091296,0.068129,-0.021405,0.008825,0.023469]\n", + "0.6201764345169067\n", "\n", "\n", "\n", + "\n", + "How the Biden campaign quickly mobilized on Trump's abortion stance\n", + "\n", "\n", - "9\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[0.017708,-0.086306,0.002358,0.010318,0.008864,0.025368,0.094156,-0.006123]\n", + "0.633980393409729\n", "\n", "\n", "\n", + "\n", + "Battle over abortion heats up in Arizona — and could be on the 2024 ballot\n", + "\n", "\n", - "10\n", - "\n", - "\n", - "Blob<1536>\n", - "\n", - "\n", - "[0.034452,0.045083,-0.000227,0.102294,0.047915,-0.012732,-0.024640,-0.043112]\n", + "0.6341449022293091\n", "\n", "\n", "\n", "\n", "
\n", - "10 rows × 3 columns\n", + "10 rows × 2 columns\n", "
\n", "\n" ], "text/plain": [ - "\u001b[0m┌\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┐\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0marticle_id\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mheadline_embedding\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec_to_json(vec_slice(headline_embedding, 0, 8)) \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┤\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.055018,-0.021632,-0.012835,0.048403,0.039037,-0.012824,-0.043627,0.031868] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.048287,0.023883,-0.004665,0.001806,0.030342,0.050691,0.050082,-0.127660] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[-0.042424,-0.019893,0.022101,-0.030609,-0.016659,0.008453,-0.056492,0.093258] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.076178,-0.080511,0.034440,0.027351,0.028441,0.038463,-0.023355,0.089898] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.028183,0.091150,-0.043882,0.028064,0.010961,0.018683,0.011500,-0.015776] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[-0.061114,-0.031104,0.060050,-0.037375,0.007963,-0.049056,-0.042365,-0.021792]\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.059814,0.026079,0.061488,0.011823,0.048770,-0.035152,0.031329,-0.015644] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.095066,0.001522,-0.030417,0.091296,0.068129,-0.021405,0.008825,0.023469] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.017708,-0.086306,0.002358,0.010318,0.008864,0.025368,0.094156,-0.006123] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m Blob<1536> \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m[0.034452,0.045083,-0.000227,0.102294,0.047915,-0.012732,-0.024640,-0.043112] \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m┌\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mdistance\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.492593914270401\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5789032578468323\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTransforming Healthcare \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5822411179542542\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mA timeline of Trump's many, many positions on abortion \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6101462841033936\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow a network of abortion pill providers works together in the wake of new threats\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6196886897087097\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m'Major hurdles': The reality check behind Biden's big abortion promise \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6198344826698303\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump's conflicting abortion stances are coming back to haunt him — and his party \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6198986768722534\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6201764345169067\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow the Biden campaign quickly mobilized on Trump's abortion stance \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.633980393409729\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mBattle over abortion heats up in Arizona — and could be on the 2024 ballot \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6341449022293091\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┘\n", "\u001b[0m\u001b[0m" ] }, - "execution_count": 8, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "select\n", + " articles.headline,\n", + " vec_articles.distance\n", + "from vec_articles\n", + "left join articles on articles.rowid = vec_articles.article_id\n", + "where headline_embedding match lembed(\"planned parenthood\")\n", + " and k = 10;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Slim it down with Binary Quantization\n", + "\n", + "The vectors in the `vec_articles` table take up a lot of space. A vector with `768` dimensions take up `786 * 4 = 3072` bytes of space each, or around `45MB` of space for these ~14,500 entries. \n", + "\n", + "That's a lot — the original text dataset was only `~4MB`!\n", + "\n", + "If you want to make the database smaller, there's a number of quantization or other methods to do so, by trading accuracy. \n", + "Here's an example of performing [binary quantization](https://alexgarcia.xyz/sqlite-vec/guides/binary-quant.html)\n", + "on this dataset, storing 768-dimensional bit-vectors instead of floating-point vectors, a `32x` size reduction, at the expense of accuracy. \n", + "\n", + "We'll keep the current SQLite database as-is, and instead make a copy into a new SQLite database file, and change the `vec_articles` table\n", + "to store bit-vectors instead. \n", + "\n", + "First, we'll make a copy of the current database into a new file:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "0 row × 0 column\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vacuum into 'tmp-artic2.slim.db';" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we'll make a connection to this new file, and drop the old `vec_articles` table that contains the large `float[768]` vectors." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "0 row × 0 column\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "attach database 'tmp-artic2.slim.db' as slim;\n", + "drop table slim.vec_articles;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can create a new `vec0` table, storing `bit[768]` vectors instead! \n", + "We can insert the original `float[768]` from the `main.vec_articles` table (original table),\n", + "calling [`vec_quantize_binary()`](https://alexgarcia.xyz/sqlite-vec/api-reference.html#vec_quantize_binary) to convert the floats to bits. " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "0 row × 0 column\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "create virtual table slim.vec_articles using vec0(\n", + " article_id integer primary key,\n", + " headline_embedding bit[768]\n", + ");\n", + "\n", + "insert into slim.vec_articles(article_id, headline_embedding)\n", "select\n", " article_id,\n", - " headline_embedding,\n", - " vec_to_json(vec_slice(headline_embedding, 0, 8))\n", - "from vec_headlines\n", - "limit 10;" + " vec_quantize_binary(headline_embedding)\n", + "from main.vec_articles;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we can `VACUUM` the new `slim` database to shrink the file, delete the `DROP`'ed pages from the older `vec0` table. " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "0 row × 0 column\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vacuum slim;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And there we have it! This file is `7.1MB`, a large reduction from the original `53MB` table. \n", + "\n", + "KNN queries are similar, only adding the `vec_quantize_binary()` function to the query vector." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "headline\n", + "\n", + "distance\n", + "
\n", + "Kamala Harris visits Planned Parenthood clinic\n", + "\n", + "139\n", + "
\n", + "How a network of abortion pill providers works together in the wake of new threats\n", + "\n", + "151\n", + "
\n", + "After Dobbs decision, more women are managing their own abortions\n", + "\n", + "153\n", + "
\n", + "A timeline of Trump's many, many positions on abortion\n", + "\n", + "156\n", + "
\n", + "Two of the country’s largest transgender rights organizations will merge\n", + "\n", + "158\n", + "
\n", + "Transforming Healthcare\n", + "\n", + "158\n", + "
\n", + "With Harris and Walz, Democrats put abortion rights at the top of the agenda\n", + "\n", + "159\n", + "
\n", + "In states with strict abortion policies, simply seeing an OB/GYN for regular care can be difficult\n", + "\n", + "160\n", + "
\n", + "Where abortion rights could be on the ballot this fall: From the Politics Desk\n", + "\n", + "161\n", + "
\n", + "Map: Where medication abortion is and isn’t legal\n", + "\n", + "162\n", + "
\n", + "
\n", + "10 rows × 2 columns\n", + "
\n", + "
\n" + ], + "text/plain": [ + "\u001b[0m┌\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mdistance\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 139\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow a network of abortion pill providers works together in the wake of new threats \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 151\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 153\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mA timeline of Trump's many, many positions on abortion \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 156\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTwo of the country’s largest transgender rights organizations will merge \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 158\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTransforming Healthcare \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 158\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWith Harris and Walz, Democrats put abortion rights at the top of the agenda \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 159\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIn states with strict abortion policies, simply seeing an OB/GYN for regular care can be difficult\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 160\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 161\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMap: Where medication abortion is and isn’t legal \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 162\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m\u001b[0m" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "select\n", + " slim.articles.headline,\n", + " slim.vec_articles.distance\n", + "from slim.vec_articles\n", + "left join slim.articles on slim.articles.rowid = slim.vec_articles.article_id\n", + "where headline_embedding match vec_quantize_binary(lembed(\"planned parenthood\"))\n", + " and k = 10;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You'll notice the results differ slightly to the full-sized query from above. Some results are ordered differently, some are missing. \n", + "The `distance` in this binary KNN search is hamming distance, not the default L2 distance. " ] } ], @@ -548,7 +843,7 @@ "language_info": { "file_extension": ".sql", "mimetype": "text/x.sqlite", - "name": "sql", + "name": "sqlite", "nb_converter": "script", "pygments_lexer": "sql", "version": "TODO" diff --git a/examples/nbc-headlines/3_search.ipynb b/examples/nbc-headlines/3_search.ipynb index 69c62ce..9294e94 100644 --- a/examples/nbc-headlines/3_search.ipynb +++ b/examples/nbc-headlines/3_search.ipynb @@ -1,9 +1,27 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NBC News Headlines: Exploring Hybrod FTS5 + Vector Search\n", + "\n", + "This notebooks explore a few different ways one could combine FTS5 and vector search results, when querying \n", + "[FTS5](https://www.sqlite.org/fts5.html) and\n", + "[`sqlite-vec`](https://github.com/asg017/sqlite-vec) virtual table.\n", + "\n", + "This dataset is a small list of headines scraped from NBC News, found in the [`./1_scrape.ipynb`](./1_scrape.ipynb) notebook.\n", + "To see how the `fts_articles` and `vec_articles` tables were created, see the [`./3_search.ipynb`](./3_search.ipynb) notebook." + ] + }, { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -26,7 +44,7 @@ "v0.1.3-alpha.2\n", "\n", "\n", - "v0.0.1-alpha.7\n", + "v0.0.1-alpha.8\n", "\n", "\n", "\n", @@ -40,7 +58,7 @@ "\u001b[0m┌\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┐\n", "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mvec_version()\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mlembed_version()\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┤\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mv0.1.3-alpha.2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mv0.0.1-alpha.7 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mv0.1.3-alpha.2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mv0.0.1-alpha.8 \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────\u001b[0m\u001b[0m┘\n", "\u001b[0m\u001b[0m" ] @@ -51,8 +69,7 @@ } ], "source": [ - ".open tmp3.db\n", - ".param set foo bar\n", + ".open tmp-artic2.db\n", "\n", ".load ../../dist/vec0\n", ".load ./lembed0\n", @@ -60,7 +77,7 @@ "insert into lembed_models(name, model)\n", " values (\n", " 'default',\n", - " lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf')\n", + " lembed_model_from_file('snowflake-arctic-embed-m-v1.5.d70deb40.f16.gguf')\n", " );\n", "\n", "select vec_version(), lembed_version();" @@ -70,13 +87,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## FTS Search" + "## Full-text Search Only\n", + "\n", + "A simple FTS query on the `fts_articles` virutal table can be made like so:" ] }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "execution_count": 19, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -89,7 +112,7 @@ "rowid\n", "\n", "\n", - "headline_highlighted\n", + "headline\n", "\n", "\n", "rank\n", @@ -102,7 +125,7 @@ "4666\n", "\n", "\n", - "Kamala Harris visits <b>Planned</b> <b>Parenthood</b> clinic\n", + "Kamala Harris visits Planned Parenthood clinic\n", "\n", "\n", "-18.9139950477264\n", @@ -113,7 +136,7 @@ "6521\n", "\n", "\n", - "Former Marine sentenced to 9 years in prison for firebombing <b>Planned</b> <b>Parenthood</b> clinic\n", + "Former Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\n", "\n", "\n", "-14.807022703838651\n", @@ -127,16 +150,16 @@ "\n" ], "text/plain": [ - "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrowid\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline_highlighted\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrank\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4666\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -18.9139950477264\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6521\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFormer Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-14.807022703838651\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrowid\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrank\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4666\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -18.9139950477264\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6521\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFormer Marine sentenced to 9 years in prison for firebombing Planned Parenthood clinic\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-14.807022703838651\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", "\u001b[0m\u001b[0m" ] }, - "execution_count": 2, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -146,9 +169,9 @@ "\n", "select\n", " rowid,\n", - " highlight(fts_headlines, 0, '', '') as headline_highlighted,\n", + " headline,\n", " rank\n", - "from fts_headlines\n", + "from fts_articles\n", "where headline match :query\n", "order by rank\n", "limit 10;" @@ -158,13 +181,26 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Vector Search" + "The `rank` column is the negative BM25 score of the query + document. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vector Search Only\n", + "\n", + "A KNN vector search can be made on the `vec_articles` virtual table like so:" ] }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, + "execution_count": 6, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -187,112 +223,112 @@ "\n", "\n", "\n", - "9475\n", + "4666\n", "\n", "\n", - "Inside a Gen Z campaign to shake up Congress\n", + "Kamala Harris visits Planned Parenthood clinic\n", "\n", "\n", - "1.0655490159988403\n", + "0.492593914270401\n", "\n", "\n", "\n", "\n", - "11236\n", + "13928\n", "\n", "\n", - "Bunnie XO and Jelly Roll announce plans to have a baby using IVF\n", + "After Dobbs decision, more women are managing their own abortions\n", "\n", "\n", - "1.071580171585083\n", + "0.5789032578468323\n", "\n", "\n", "\n", "\n", - "4110\n", + "12636\n", "\n", "\n", - "First over-the-counter birth control pill hits store shelves\n", + "Transforming Healthcare\n", "\n", "\n", - "1.0831280946731567\n", + "0.5822411179542542\n", "\n", "\n", "\n", "\n", - "1808\n", + "6979\n", "\n", "\n", - "More nonpregnant women are requesting abortion pills to have on hand\n", + "A timeline of Trump's many, many positions on abortion\n", "\n", "\n", - "1.0897283554077148\n", + "0.6101462841033936\n", "\n", "\n", "\n", "\n", - "11154\n", + "7038\n", "\n", "\n", - "Meet the anti-abortion group using white coats and research to advance its cause\n", + "How a network of abortion pill providers works together in the wake of new threats\n", "\n", "\n", - "1.089759349822998\n", + "0.6196886897087097\n", "\n", "\n", "\n", "\n", - "5980\n", + "6914\n", "\n", "\n", - "Deathbed confession leads to bodies of mother and daughter killed 24 years ago\n", + "'Major hurdles': The reality check behind Biden's big abortion promise\n", "\n", "\n", - "1.092090368270874\n", + "0.6198344826698303\n", "\n", "\n", "\n", "\n", - "3540\n", + "6794\n", "\n", "\n", - "Holding photos of their deceased children, parents lobby Congress to pass online safety legislation\n", + "Trump's conflicting abortion stances are coming back to haunt him — and his party\n", "\n", "\n", - "1.1028637886047363\n", + "0.6198986768722534\n", "\n", "\n", "\n", "\n", - "4109\n", + "7381\n", "\n", "\n", - "Alabama fertility clinic at the center of IVF ruling is resuming some procedures\n", + "Where abortion rights could be on the ballot this fall: From the Politics Desk\n", "\n", "\n", - "1.103183627128601\n", + "0.6201764345169067\n", "\n", "\n", "\n", "\n", - "4889\n", + "6871\n", "\n", "\n", - "Inside the organized crime rings plaguing Ulta, T.J. Maxx, Walgreens and other retailers\n", + "How the Biden campaign quickly mobilized on Trump's abortion stance\n", "\n", "\n", - "1.1056489944458008\n", + "0.633980393409729\n", "\n", "\n", "\n", "\n", - "86\n", + "5496\n", "\n", "\n", - "Florida bars transgender people from changing the sex on their driver's licenses\n", + "Battle over abortion heats up in Arizona — and could be on the 2024 ballot\n", "\n", "\n", - "1.1073163747787476\n", + "0.6341449022293091\n", "\n", "\n", "\n", @@ -303,24 +339,24 @@ "\n" ], "text/plain": [ - "\u001b[0m┌\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┐\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1marticle_id\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mdistance\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┤\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9475\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInside a Gen Z campaign to shake up Congress \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0655490159988403\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 11236\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mBunnie XO and Jelly Roll announce plans to have a baby using IVF \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.071580171585083\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4110\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFirst over-the-counter birth control pill hits store shelves \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0831280946731567\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1808\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMore nonpregnant women are requesting abortion pills to have on hand \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.0897283554077148\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 11154\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMeet the anti-abortion group using white coats and research to advance its cause \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.089759349822998\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5980\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mDeathbed confession leads to bodies of mother and daughter killed 24 years ago \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.092090368270874\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3540\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHolding photos of their deceased children, parents lobby Congress to pass online safety legislation\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1028637886047363\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4109\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAlabama fertility clinic at the center of IVF ruling is resuming some procedures \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1.103183627128601\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4889\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mInside the organized crime rings plaguing Ulta, T.J. Maxx, Walgreens and other retailers \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1056489944458008\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 86\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFlorida bars transgender people from changing the sex on their driver's licenses \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1.1073163747787476\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m┌\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1marticle_id\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mdistance\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4666\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris visits Planned Parenthood clinic \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.492593914270401\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 13928\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5789032578468323\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 12636\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTransforming Healthcare \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5822411179542542\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6979\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mA timeline of Trump's many, many positions on abortion \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6101462841033936\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7038\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow a network of abortion pill providers works together in the wake of new threats\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6196886897087097\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6914\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m'Major hurdles': The reality check behind Biden's big abortion promise \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6198344826698303\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6794\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump's conflicting abortion stances are coming back to haunt him — and his party \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6198986768722534\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6201764345169067\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6871\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mHow the Biden campaign quickly mobilized on Trump's abortion stance \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.633980393409729\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5496\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mBattle over abortion heats up in Arizona — and could be on the 2024 ballot \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.6341449022293091\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┘\n", "\u001b[0m\u001b[0m" ] }, - "execution_count": 3, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -332,8 +368,8 @@ " article_id,\n", " articles.headline,\n", " distance\n", - "from vec_headlines\n", - "left join articles on articles.rowid = vec_headlines.article_id\n", + "from vec_articles\n", + "left join articles on articles.rowid = vec_articles.article_id\n", "where headline_embedding match lembed(:query)\n", " and k = 10;" ] @@ -342,13 +378,30 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## keyword-first" + "The `distance` column is the L2 distance between the query vector and the headline embedding. \n", + "\n", + "The rest of this notebook explore different ways of combining these FTS5 and vector search results. \n", + "The core queries are similar, and only really different on different `JOIN` or `ORDER BY` techniques." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Combination Technique #1: Keyword-first\n", + "\n", + "In many search-engine cases, you may way to display keyword matches first, and supplement the rest wih with vector search results. \n", + "This makes some intuitive sense — keyword matches are what uses expect, but you'll want to display more result if there are only a few matching documents. \n" ] }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "execution_count": 11, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -480,82 +533,62 @@ "\n", "\n", "\n", - "11154\n", + "6989\n", "\n", "\n", - "Meet the anti-abortion group using white coats and research to advance its cause\n", + "Trump says abortion restrictions should be left to states, dodging a national ban\n", "\n", "\n", "vec\n", "\n", "\n", - "11154\n", + "6989\n", "\n", "\n", "1\n", "\n", "\n", - "0.7592407464981079\n", + "0.4930749833583832\n", "\n", "\n", "\n", "\n", - "3958\n", + "13928\n", "\n", "\n", - "Supreme Court signals it is likely to reject a challenge to abortion pill access\n", + "After Dobbs decision, more women are managing their own abortions\n", "\n", "\n", "vec\n", "\n", "\n", - "3958\n", + "13928\n", "\n", "\n", "2\n", "\n", "\n", - "0.8076152205467224\n", + "0.5120846629142761\n", "\n", "\n", "\n", "\n", - "6011\n", + "11822\n", "\n", "\n", - "Supreme Court wrestles with abortion clash over emergency room treatment for pregnant women\n", + "Iowa now bans most abortions after about 6 weeks\n", "\n", "\n", "vec\n", "\n", "\n", - "6011\n", + "11822\n", "\n", "\n", "3\n", "\n", "\n", - "0.812921941280365\n", - "\n", - "\n", - "\n", - "\n", - "3933\n", - "\n", - "\n", - "The Supreme Court puts the GOP’s abortion dilemma back in the spotlight: From the Politics Desk\n", - "\n", - "\n", - "vec\n", - "\n", - "\n", - "3933\n", - "\n", - "\n", - "4\n", - "\n", - "\n", - "0.8234137296676636\n", + "0.512569785118103\n", "\n", "\n", "\n", @@ -572,110 +605,130 @@ "7381\n", "\n", "\n", - "5\n", + "4\n", "\n", "\n", - "0.8250945210456848\n", + "0.5168291926383972\n", "\n", "\n", "\n", "\n", - "6680\n", + "14009\n", "\n", "\n", - "More young people choosing permanent sterilization after abortion restrictions\n", + "Trump signals openness to banning abortion pill\n", "\n", "\n", "vec\n", "\n", "\n", - "6680\n", + "14009\n", + "\n", + "\n", + "5\n", + "\n", + "\n", + "0.5288293957710266\n", + "\n", + "\n", + "\n", + "\n", + "4426\n", + "\n", + "\n", + "Medication abortions rose in year after Dobbs decision, report finds\n", + "\n", + "\n", + "vec\n", + "\n", + "\n", + "4426\n", "\n", "\n", "6\n", "\n", "\n", - "0.832757294178009\n", + "0.5305097699165344\n", "\n", "\n", "\n", "\n", - "9776\n", + "4328\n", "\n", "\n", - "States with abortion bans saw birth control prescriptions fall post-Dobbs, study finds\n", + "Trump signals support for a national 15-week abortion ban\n", "\n", "\n", "vec\n", "\n", "\n", - "9776\n", + "4328\n", "\n", "\n", "7\n", "\n", "\n", - "0.837793231010437\n", + "0.532848060131073\n", "\n", "\n", "\n", "\n", - "9187\n", + "6979\n", "\n", "\n", - "Abortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds\n", + "A timeline of Trump's many, many positions on abortion\n", "\n", "\n", "vec\n", "\n", "\n", - "9187\n", + "6979\n", "\n", "\n", "8\n", "\n", "\n", - "0.8381417989730835\n", + "0.533357560634613\n", "\n", "\n", "\n", "\n", - "2646\n", + "2092\n", "\n", "\n", - "Trump campaign scrambles over abortion ban report as Democrats seize the moment\n", + "For the first time in years, Sen. Graham hasn't introduced a national abortion ban\n", "\n", "\n", "vec\n", "\n", "\n", - "2646\n", + "2092\n", "\n", "\n", "9\n", "\n", "\n", - "0.8460941910743713\n", + "0.5336830615997314\n", "\n", "\n", "\n", "\n", - "9447\n", + "6794\n", "\n", "\n", - "At a clinic in Hialeah, immigrants wrestle with Florida's new abortion ban\n", + "Trump's conflicting abortion stances are coming back to haunt him — and his party\n", "\n", "\n", "vec\n", "\n", "\n", - "9447\n", + "6794\n", "\n", "\n", "10\n", "\n", "\n", - "0.8477099537849426\n", + "0.5347095131874084\n", "\n", "\n", "\n", @@ -694,55 +747,50 @@ "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2292\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mOhio GOP Senate candidates pitch federal abortion bans even after voters protected reproductive rights\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2292\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.7149595994016\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 452\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m64K women and girls became pregnant due to rape in states with abortion bans, study estimates \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 452\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.163558569425538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAbortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mfts \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.163558569425538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11154\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMeet the anti-abortion group using white coats and research to advance its cause \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 11154\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.7592407464981079\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3958\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mSupreme Court signals it is likely to reject a challenge to abortion pill access \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3958\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.8076152205467224\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6011\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mSupreme Court wrestles with abortion clash over emergency room treatment for pregnant women \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6011\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.812921941280365\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3933\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mThe Supreme Court puts the GOP’s abortion dilemma back in the spotlight: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3933\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.8234137296676636\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.8250945210456848\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6680\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMore young people choosing permanent sterilization after abortion restrictions \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6680\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.832757294178009\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9776\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mStates with abortion bans saw birth control prescriptions fall post-Dobbs, study finds \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9776\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.837793231010437\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAbortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.8381417989730835\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2646\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump campaign scrambles over abortion ban report as Democrats seize the moment \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2646\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.8460941910743713\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9447\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAt a clinic in Hialeah, immigrants wrestle with Florida's new abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9447\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.8477099537849426\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6989\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump says abortion restrictions should be left to states, dodging a national ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6989\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.4930749833583832\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m13928\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 13928\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5120846629142761\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11822\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIowa now bans most abortions after about 6 weeks \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 11822\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.512569785118103\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5168291926383972\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m14009\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump signals openness to banning abortion pill \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 14009\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5288293957710266\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4426\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMedication abortions rose in year after Dobbs decision, report finds \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4426\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5305097699165344\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4328\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump signals support for a national 15-week abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4328\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.532848060131073\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6979\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mA timeline of Trump's many, many positions on abortion \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6979\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.533357560634613\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2092\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFor the first time in years, Sen. Graham hasn't introduced a national abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2092\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5336830615997314\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6794\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump's conflicting abortion stances are coming back to haunt him — and his party \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mvec \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6794\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.5347095131874084\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", "\u001b[0m\u001b[0m" ] }, - "execution_count": 15, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ ".param set query abortion bans\n", - "\n", - "\n", - ".param set weight_fts 1.0\n", - ".param set weight_vec 1.0\n", - ".param set rrf_k 60\n", ".param set k 10\n", "\n", "\n", - "with vec_matches as (\n", - " select\n", - " article_id,\n", - " row_number() over (order by distance) as rank_number,\n", - " distance as score\n", - " from vec_headlines\n", - " where\n", - " headline_embedding match lembed(:query)\n", - " and k = :k\n", - " order by distance\n", - "),\n", - "fts_matches as (\n", + "with fts_matches as (\n", " select\n", " rowid as article_id,\n", " row_number() over (order by rank) as rank_number,\n", " rank as score\n", - " from fts_headlines\n", + " from fts_articles\n", " where headline match :query\n", " limit :k\n", "),\n", + "vec_matches as (\n", + " select\n", + " article_id,\n", + " row_number() over (order by distance) as rank_number,\n", + " distance as score\n", + " from vec_articles\n", + " where\n", + " headline_embedding match lembed(:query)\n", + " and k = :k\n", + " order by distance\n", + "),\n", "combined as (\n", " select 'fts' as match_type, * from fts_matches\n", " union all\n", @@ -764,33 +812,32 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## FTS + Vector search: RFF" + "We do this with a verbose CTE: one step for the FTS5 query, another for the vector search, one to \"combine\" the results with a `UNION ALL`, and one last one to `LEFT JOIN` back to the base `articles` table to get the headline.\n", + "\n", + "Here we have 5 FTS results and 10 additional vector results. This seems pretty natural, a fallback to vector search when keywords matches lack a bit.\n", + "\n", + "One note: this example doesn't do any de-duplication, so you may get the same results twice. So you may want to add a `DISTINCT` or `GROUP BY` somehwere to handle that. " ] }, { - "cell_type": "code", - "execution_count": 4, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[no code]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - ".param set name alex" + "## Combination Technique #2: Reciprocal Rank Fusion (RRF)\n", + "\n", + "[Reciprocal Rank Fusion](https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking) \n", + "is another combination technique, where matches that are both FTS matches and vector matches\n", + "are ranked higher than other. The CTE logic is a bit more involved, but can still be represented in a few steps:\n" ] }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, + "execution_count": 14, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -825,114 +872,91 @@ "\n", "\n", "\n", - "9776\n", + "4328\n", "\n", "\n", - "States with abortion bans saw birth control prescriptions fall post-Dobbs, study finds\n", - "\n", - "\n", - "7\n", + "Trump signals support for a national 15-week abortion ban\n", "\n", "\n", "2\n", "\n", "\n", - "0.031054405392392875\n", + "3\n", "\n", "\n", - "0.837793231010437\n", + "0.03200204813108039\n", "\n", "\n", - "-10.016316725971112\n", + "0.5334203839302063\n", + "\n", + "\n", + "-9.841645168493953\n", "\n", "\n", "\n", "\n", - "9187\n", + "5769\n", "\n", "\n", - "Abortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds\n", + "Mitch McConnell shies away from supporting national abortion ban\n", "\n", "\n", "8\n", "\n", "\n", - "5\n", - "\n", - "\n", - "0.030090497737556562\n", - "\n", - "\n", - "0.8381417989730835\n", - "\n", - "\n", - "-9.163558569425538\n", - "\n", - "\n", - "\n", - "\n", - "10098\n", - "\n", - "\n", - "Kamala Harris says abortion bans are creating 'a health care crisis'\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "1\n", - "\n", - "\n", - "0.01639344262295082\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "-10.678829270936067\n", - "\n", - "\n", - "\n", - "\n", - "11154\n", - "\n", - "\n", - "Meet the anti-abortion group using white coats and research to advance its cause\n", - "\n", - "\n", - "1\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "0.01639344262295082\n", - "\n", - "\n", - "0.7592407464981079\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "3958\n", - "\n", - "\n", - "Supreme Court signals it is likely to reject a challenge to abortion pill access\n", - "\n", - "\n", "2\n", "\n", "\n", + "0.030834914611005692\n", + "\n", + "\n", + "0.5501425266265869\n", + "\n", + "\n", + "-10.19017787567105\n", + "\n", + "\n", + "\n", + "\n", + "9507\n", + "\n", + "\n", + "Arizona Senate passes repeal of 1864 abortion ban\n", + "\n", + "\n", "\n", "\n", "\n", - "0.016129032258064516\n", + "1\n", "\n", "\n", - "0.8076152205467224\n", + "0.01639344262295082\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-10.564302831642667\n", + "\n", + "\n", + "\n", + "\n", + "6989\n", + "\n", + "\n", + "Trump says abortion restrictions should be left to states, dodging a national ban\n", + "\n", + "\n", + "1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0.01639344262295082\n", + "\n", + "\n", + "0.5142395496368408\n", "\n", "\n", "\n", @@ -940,33 +964,10 @@ "\n", "\n", "\n", - "2292\n", + "10717\n", "\n", "\n", - "Ohio GOP Senate candidates pitch federal abortion bans even after voters protected reproductive rights\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "3\n", - "\n", - "\n", - "0.015873015873015872\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "-9.7149595994016\n", - "\n", - "\n", - "\n", - "\n", - "6011\n", - "\n", - "\n", - "Supreme Court wrestles with abortion clash over emergency room treatment for pregnant women\n", + "Supreme Court rejects bid to restrict access to abortion pill\n", "\n", "\n", "3\n", @@ -978,7 +979,7 @@ "0.015873015873015872\n", "\n", "\n", - "0.812921941280365\n", + "0.5351248383522034\n", "\n", "\n", "\n", @@ -986,10 +987,10 @@ "\n", "\n", "\n", - "452\n", + "5981\n", "\n", "\n", - "64K women and girls became pregnant due to rape in states with abortion bans, study estimates\n", + "Arizona state House passes bill to repeal 1864 abortion ban\n", "\n", "\n", "\n", @@ -1004,15 +1005,15 @@ "\n", "\n", "\n", - "-9.163558569425538\n", + "-9.841645168493953\n", "\n", "\n", "\n", "\n", - "3933\n", + "14009\n", "\n", "\n", - "The Supreme Court puts the GOP’s abortion dilemma back in the spotlight: From the Politics Desk\n", + "Trump signals openness to banning abortion pill\n", "\n", "\n", "4\n", @@ -1024,7 +1025,7 @@ "0.015625\n", "\n", "\n", - "0.8234137296676636\n", + "0.5364335179328918\n", "\n", "\n", "\n", @@ -1032,6 +1033,29 @@ "\n", "\n", "\n", + "6375\n", + "\n", + "\n", + "Arizona Republicans again quash effort to repeal 1864 abortion ban\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "5\n", + "\n", + "\n", + "0.015384615384615385\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-9.841645168493953\n", + "\n", + "\n", + "\n", + "\n", "7381\n", "\n", "\n", @@ -1047,7 +1071,7 @@ "0.015384615384615385\n", "\n", "\n", - "0.8250945210456848\n", + "0.5462378859519958\n", "\n", "\n", "\n", @@ -1055,10 +1079,33 @@ "\n", "\n", "\n", - "6680\n", + "9443\n", "\n", "\n", - "More young people choosing permanent sterilization after abortion restrictions\n", + "Arizona Gov. Katie Hobbs signs repeal of 1864 abortion ban\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "6\n", + "\n", + "\n", + "0.015151515151515152\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-9.841645168493953\n", + "\n", + "\n", + "\n", + "\n", + "13928\n", + "\n", + "\n", + "After Dobbs decision, more women are managing their own abortions\n", "\n", "\n", "6\n", @@ -1070,7 +1117,122 @@ "0.015151515151515152\n", "\n", "\n", - "0.832757294178009\n", + "0.5467031002044678\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "1821\n", + "\n", + "\n", + "Dominican women fight child marriage, teen pregancy amid total abortion ban\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "7\n", + "\n", + "\n", + "0.014925373134328358\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-9.51616557526609\n", + "\n", + "\n", + "\n", + "\n", + "2092\n", + "\n", + "\n", + "For the first time in years, Sen. Graham hasn't introduced a national abortion ban\n", + "\n", + "\n", + "7\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0.014925373134328358\n", + "\n", + "\n", + "0.5477523803710938\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "7150\n", + "\n", + "\n", + "Tennessee court weighs challenge to abortion ban’s narrow medical exception\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "8\n", + "\n", + "\n", + "0.014705882352941176\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-9.51616557526609\n", + "\n", + "\n", + "\n", + "\n", + "8690\n", + "\n", + "\n", + "Arizona Supreme Court pushes back enforcement date for 1864 abortion ban\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "9\n", + "\n", + "\n", + "0.014492753623188406\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-9.51616557526609\n", + "\n", + "\n", + "\n", + "\n", + "11822\n", + "\n", + "\n", + "Iowa now bans most abortions after about 6 weeks\n", + "\n", + "\n", + "9\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0.014492753623188406\n", + "\n", + "\n", + "0.5557170510292053\n", "\n", "\n", "\n", @@ -1084,27 +1246,27 @@ "Trump campaign scrambles over abortion ban report as Democrats seize the moment\n", "\n", "\n", - "9\n", + "\n", + "\n", + "\n", + "10\n", + "\n", + "\n", + "0.014285714285714285\n", "\n", "\n", "\n", "\n", "\n", - "0.014492753623188406\n", - "\n", - "\n", - "0.8460941910743713\n", - "\n", - "\n", - "\n", + "-9.211525101866211\n", "\n", "\n", "\n", "\n", - "9447\n", + "5538\n", "\n", "\n", - "At a clinic in Hialeah, immigrants wrestle with Florida's new abortion ban\n", + "Map: Where medication abortion is and isn’t legal\n", "\n", "\n", "10\n", @@ -1116,7 +1278,7 @@ "0.014285714285714285\n", "\n", "\n", - "0.8477099537849426\n", + "0.5588464140892029\n", "\n", "\n", "\n", @@ -1125,63 +1287,66 @@ "\n", "\n", "
\n", - "13 rows × 7 columns\n", + "18 rows × 7 columns\n", "
\n", "\n" ], "text/plain": [ - "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mid\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mvec_rank\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mfts_rank\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mcombined_rank\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mvec_distance\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mfts_score\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9776\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mStates with abortion bans saw birth control prescriptions fall post-Dobbs, study finds \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.031054405392392875\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.837793231010437\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.016316725971112\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAbortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.030090497737556562\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.8381417989730835\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.163558569425538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m10098\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris says abortion bans are creating 'a health care crisis' \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.01639344262295082\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.678829270936067\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11154\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMeet the anti-abortion group using white coats and research to advance its cause \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.01639344262295082\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.7592407464981079\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3958\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mSupreme Court signals it is likely to reject a challenge to abortion pill access \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.016129032258064516\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.8076152205467224\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2292\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mOhio GOP Senate candidates pitch federal abortion bans even after voters protected reproductive rights\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015873015873015872\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.7149595994016\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6011\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mSupreme Court wrestles with abortion clash over emergency room treatment for pregnant women \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015873015873015872\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.812921941280365\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 452\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m64K women and girls became pregnant due to rape in states with abortion bans, study estimates \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.015625\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.163558569425538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3933\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mThe Supreme Court puts the GOP’s abortion dilemma back in the spotlight: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.015625\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.8234137296676636\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015384615384615385\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.8250945210456848\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6680\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMore young people choosing permanent sterilization after abortion restrictions \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015151515151515152\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.832757294178009\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2646\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump campaign scrambles over abortion ban report as Democrats seize the moment \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014492753623188406\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.8460941910743713\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9447\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAt a clinic in Hialeah, immigrants wrestle with Florida's new abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014285714285714285\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.8477099537849426\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mid\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mvec_rank\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mfts_rank\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mcombined_rank\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mvec_distance\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mfts_score\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4328\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump signals support for a national 15-week abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.03200204813108039\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5334203839302063\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5769\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMitch McConnell shies away from supporting national abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.030834914611005692\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5501425266265869\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -10.19017787567105\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9507\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Senate passes repeal of 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.01639344262295082\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.564302831642667\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6989\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump says abortion restrictions should be left to states, dodging a national ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.01639344262295082\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5142395496368408\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m10717\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mSupreme Court rejects bid to restrict access to abortion pill \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015873015873015872\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5351248383522034\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5981\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona state House passes bill to repeal 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.015625\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m14009\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump signals openness to banning abortion pill \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 0.015625\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5364335179328918\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6375\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Republicans again quash effort to repeal 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015384615384615385\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7381\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mWhere abortion rights could be on the ballot this fall: From the Politics Desk \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015384615384615385\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5462378859519958\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9443\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Gov. Katie Hobbs signs repeal of 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015151515151515152\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m13928\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAfter Dobbs decision, more women are managing their own abortions \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.015151515151515152\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5467031002044678\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1821\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mDominican women fight child marriage, teen pregancy amid total abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014925373134328358\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.51616557526609\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2092\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mFor the first time in years, Sen. Graham hasn't introduced a national abortion ban\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014925373134328358\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5477523803710938\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7150\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTennessee court weighs challenge to abortion ban’s narrow medical exception \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014705882352941176\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.51616557526609\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8690\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Supreme Court pushes back enforcement date for 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014492753623188406\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.51616557526609\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m11822\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mIowa now bans most abortions after about 6 weeks \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014492753623188406\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5557170510292053\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2646\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump campaign scrambles over abortion ban report as Democrats seize the moment \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014285714285714285\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.211525101866211\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMap: Where medication abortion is and isn’t legal \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.014285714285714285\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m0.5588464140892029\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m──────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", "\u001b[0m\u001b[0m" ] }, - "execution_count": 9, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - ".param set query abortion bans\n", + ".param set query abortion ban\n", "\n", "\n", + ".param set k 10\n", + ".param set rrf_k 60\n", ".param set weight_fts 1.0\n", ".param set weight_vec 1.0\n", - ".param set rrf_k 60\n", - ".param set k 10\n", - "\n", "\n", "with vec_matches as (\n", " select\n", " article_id,\n", " row_number() over (order by distance) as rank_number,\n", " distance\n", - " from vec_headlines\n", + " from vec_articles\n", " where\n", " headline_embedding match lembed(:query)\n", " and k = :k\n", - " order by distance\n", "),\n", "fts_matches as (\n", " select\n", " rowid,\n", " row_number() over (order by rank) as rank_number,\n", " rank as score\n", - " from fts_headlines\n", + " from fts_articles\n", " where headline match :query\n", " limit :k\n", "),\n", @@ -1209,13 +1374,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Re-rank by semantics" + "The first two CTE steps are identical to the \"keyword-first\" approach, just a normal FTS5 + vector KNN queries. \n", + "\n", + "The combination CTE step is more involved, and is described in detail in [this \"Hybrid Search\" Supabase docs page](https://supabase.com/docs/guides/ai/hybrid-search). \n", + "What's nice about this approach is that you can configure the \"weights\" of FTS or vector results with a normal SQL parameter. \n", + "\n", + "In this query, we can see the top result `\"Trump signals support for a national 15-week abortion ban\"` was neither a top FTS result or vector result — only ranked `2` and `3` respectively. \n", + "But since it appeared in both the FTS and vector results, it's ranked higher than others, same with `\"Mitch McConnell shies away from supporting national abortion ban\"`. The rest of the results are\n", + "FTS + vector results interwoven together, pretty nice!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combination Technique #3: Re-rank by semantics\n", + "\n", + "Here we use FTS5 results are the \"source truth\", but we re-order them based on semantic similarity between " ] }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, + "execution_count": 18, + "metadata": { + "vscode": { + "languageId": "sql" + } + }, "outputs": [ { "data": { @@ -1244,116 +1429,206 @@ "\n", "\n", "\n", - "452\n", + "4328\n", "\n", "\n", - "64K women and girls became pregnant due to rape in states with abortion bans, study estimates\n", + "Trump signals support for a national 15-week abortion ban\n", "\n", "\n", - "452\n", - "\n", - "\n", - "4\n", - "\n", - "\n", - "-9.163558569425538\n", - "\n", - "\n", - "\n", - "\n", - "9776\n", - "\n", - "\n", - "States with abortion bans saw birth control prescriptions fall post-Dobbs, study finds\n", - "\n", - "\n", - "9776\n", - "\n", - "\n", - "2\n", - "\n", - "\n", - "-10.016316725971112\n", - "\n", - "\n", - "\n", - "\n", - "2292\n", - "\n", - "\n", - "Ohio GOP Senate candidates pitch federal abortion bans even after voters protected reproductive rights\n", - "\n", - "\n", - "2292\n", + "4328\n", "\n", "\n", "3\n", "\n", "\n", - "-9.7149595994016\n", + "-9.841645168493953\n", "\n", "\n", "\n", "\n", - "10098\n", + "5769\n", "\n", "\n", - "Kamala Harris says abortion bans are creating 'a health care crisis'\n", + "Mitch McConnell shies away from supporting national abortion ban\n", "\n", "\n", - "10098\n", + "5769\n", "\n", "\n", - "1\n", + "2\n", "\n", "\n", - "-10.678829270936067\n", + "-10.19017787567105\n", "\n", "\n", "\n", "\n", - "9187\n", + "2646\n", "\n", "\n", - "Abortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds\n", + "Trump campaign scrambles over abortion ban report as Democrats seize the moment\n", "\n", "\n", - "9187\n", + "2646\n", + "\n", + "\n", + "10\n", + "\n", + "\n", + "-9.211525101866211\n", + "\n", + "\n", + "\n", + "\n", + "7150\n", + "\n", + "\n", + "Tennessee court weighs challenge to abortion ban’s narrow medical exception\n", + "\n", + "\n", + "7150\n", + "\n", + "\n", + "8\n", + "\n", + "\n", + "-9.51616557526609\n", + "\n", + "\n", + "\n", + "\n", + "1821\n", + "\n", + "\n", + "Dominican women fight child marriage, teen pregancy amid total abortion ban\n", + "\n", + "\n", + "1821\n", + "\n", + "\n", + "7\n", + "\n", + "\n", + "-9.51616557526609\n", + "\n", + "\n", + "\n", + "\n", + "6375\n", + "\n", + "\n", + "Arizona Republicans again quash effort to repeal 1864 abortion ban\n", + "\n", + "\n", + "6375\n", "\n", "\n", "5\n", "\n", "\n", - "-9.163558569425538\n", + "-9.841645168493953\n", + "\n", + "\n", + "\n", + "\n", + "9507\n", + "\n", + "\n", + "Arizona Senate passes repeal of 1864 abortion ban\n", + "\n", + "\n", + "9507\n", + "\n", + "\n", + "1\n", + "\n", + "\n", + "-10.564302831642667\n", + "\n", + "\n", + "\n", + "\n", + "8690\n", + "\n", + "\n", + "Arizona Supreme Court pushes back enforcement date for 1864 abortion ban\n", + "\n", + "\n", + "8690\n", + "\n", + "\n", + "9\n", + "\n", + "\n", + "-9.51616557526609\n", + "\n", + "\n", + "\n", + "\n", + "5981\n", + "\n", + "\n", + "Arizona state House passes bill to repeal 1864 abortion ban\n", + "\n", + "\n", + "5981\n", + "\n", + "\n", + "4\n", + "\n", + "\n", + "-9.841645168493953\n", + "\n", + "\n", + "\n", + "\n", + "9443\n", + "\n", + "\n", + "Arizona Gov. Katie Hobbs signs repeal of 1864 abortion ban\n", + "\n", + "\n", + "9443\n", + "\n", + "\n", + "6\n", + "\n", + "\n", + "-9.841645168493953\n", "\n", "\n", "\n", "\n", "
\n", - "5 rows × 5 columns\n", + "10 rows × 5 columns\n", "
\n", "\n" ], "text/plain": [ - "\u001b[0m┌\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mid\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrowid\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mfts_rank_number\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mscore\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 452\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m64K women and girls became pregnant due to rape in states with abortion bans, study estimates \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 452\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.163558569425538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9776\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mStates with abortion bans saw birth control prescriptions fall post-Dobbs, study finds \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9776\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.016316725971112\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2292\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mOhio GOP Senate candidates pitch federal abortion bans even after voters protected reproductive rights\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2292\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.7149595994016\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m10098\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mKamala Harris says abortion bans are creating 'a health care crisis' \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m10098\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.678829270936067\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mAbortion bans drive away up to half of young talent, CNBC/Generation Lab youth survey finds \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9187\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.163558569425538\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m────────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", + "\u001b[0m┌\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────\u001b[0m\u001b[0m┬\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┐\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mid\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mheadline\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mrowid\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mfts_rank_number\u001b[0m\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m\u001b[0m\u001b[1mscore\u001b[0m \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m├\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────\u001b[0m\u001b[0m┼\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┤\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m4328\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump signals support for a national 15-week abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4328\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 3\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m5769\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mMitch McConnell shies away from supporting national abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5769\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -10.19017787567105\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m2646\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTrump campaign scrambles over abortion ban report as Democrats seize the moment\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 2646\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 10\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.211525101866211\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m7150\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mTennessee court weighs challenge to abortion ban’s narrow medical exception \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7150\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.51616557526609\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m1821\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mDominican women fight child marriage, teen pregancy amid total abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1821\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 7\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.51616557526609\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m6375\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Republicans again quash effort to repeal 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6375\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m9507\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Senate passes repeal of 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9507\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 1\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m-10.564302831642667\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m8690\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Supreme Court pushes back enforcement date for 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 8690\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.51616557526609\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m5981\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona state House passes bill to repeal 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 5981\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 4\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m9443\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0mArizona Gov. Katie Hobbs signs repeal of 1864 abortion ban \u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 9443\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m 6\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m \u001b[0m\u001b[0m\u001b[0m -9.841645168493953\u001b[0m \u001b[0m\u001b[0m│\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0m\u001b[0m└\u001b[0m\u001b[0m──────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m───────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────\u001b[0m\u001b[0m┴\u001b[0m\u001b[0m─────────────────────\u001b[0m\u001b[0m┘\n", "\u001b[0m\u001b[0m" ] }, - "execution_count": 12, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - ".param set query abortion bans\n", + ".param set query abortion ban\n", ".param set k 10\n", "\n", "\n", @@ -1362,7 +1637,7 @@ " rowid,\n", " row_number() over (order by rank) as fts_rank_number,\n", " rank as score\n", - " from fts_headlines\n", + " from fts_articles\n", " where headline match :query\n", " limit :k\n", "),\n", @@ -1389,7 +1664,7 @@ "language_info": { "file_extension": ".sql", "mimetype": "text/x.sqlite", - "name": "sql", + "name": "sqlite", "nb_converter": "script", "pygments_lexer": "sql", "version": "TODO"