mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
Add comprehensive ANN benchmarking suite
Extend benchmarks-ann/ with results database (SQLite with per-query detail and continuous writes), dataset subfolder organization, --subset-size and --warmup options. Supports systematic comparison across flat, rescore, IVF, and DiskANN index types.
This commit is contained in:
parent
a248ecd061
commit
dbbb4b98f7
26 changed files with 2127 additions and 292 deletions
29
benchmarks-ann/datasets/nyt-384/Makefile
Normal file
29
benchmarks-ann/datasets/nyt-384/Makefile
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
MODEL ?= mixedbread-ai/mxbai-embed-xsmall-v1
|
||||
K ?= 100
|
||||
BATCH_SIZE ?= 512
|
||||
DATA_DIR ?= ../nyt/data
|
||||
|
||||
all: base.db
|
||||
|
||||
$(DATA_DIR):
|
||||
$(MAKE) -C ../nyt data
|
||||
|
||||
contents.db: $(DATA_DIR)
|
||||
uv run ../nyt-768/build-contents.py --data-dir $(DATA_DIR) -o $@
|
||||
|
||||
base.db: contents.db queries.txt
|
||||
uv run ../nyt-1024/build-base.py \
|
||||
--contents-db contents.db \
|
||||
--model $(MODEL) \
|
||||
--queries-file queries.txt \
|
||||
--batch-size $(BATCH_SIZE) \
|
||||
--k $(K) \
|
||||
-o $@
|
||||
|
||||
queries.txt:
|
||||
cp ../nyt/queries.txt $@
|
||||
|
||||
clean:
|
||||
rm -f base.db contents.db
|
||||
|
||||
.PHONY: all clean
|
||||
100
benchmarks-ann/datasets/nyt-384/queries.txt
Normal file
100
benchmarks-ann/datasets/nyt-384/queries.txt
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
latest news on climate change policy
|
||||
presidential election results and analysis
|
||||
stock market crash causes
|
||||
coronavirus vaccine development updates
|
||||
artificial intelligence breakthrough in healthcare
|
||||
supreme court ruling on abortion rights
|
||||
tech companies layoff announcements
|
||||
earthquake damages in California
|
||||
cybersecurity breach at major corporation
|
||||
space exploration mission to Mars
|
||||
immigration reform legislation debate
|
||||
renewable energy investment trends
|
||||
healthcare costs rising across America
|
||||
protests against police brutality
|
||||
wildfires destroy homes in the West
|
||||
Olympic games highlights and records
|
||||
celebrity scandal rocks Hollywood
|
||||
breakthrough cancer treatment discovered
|
||||
housing market bubble concerns
|
||||
federal reserve interest rate decision
|
||||
school shooting tragedy response
|
||||
diplomatic tensions between superpowers
|
||||
drone strike kills terrorist leader
|
||||
social media platform faces regulation
|
||||
archaeological discovery reveals ancient civilization
|
||||
unemployment rate hits record low
|
||||
autonomous vehicles testing expansion
|
||||
streaming service launches original content
|
||||
opioid crisis intervention programs
|
||||
trade war tariffs impact economy
|
||||
infrastructure bill passes Congress
|
||||
data privacy concerns grow
|
||||
minimum wage increase proposal
|
||||
college admissions scandal exposed
|
||||
NFL player protest during anthem
|
||||
cryptocurrency regulation debate
|
||||
pandemic lockdown restrictions eased
|
||||
mass shooting gun control debate
|
||||
tax reform legislation impact
|
||||
ransomware attack cripples pipeline
|
||||
climate activists stage demonstration
|
||||
sports team wins championship
|
||||
banking system collapse fears
|
||||
pharmaceutical company fraud charges
|
||||
genetic engineering ethical concerns
|
||||
border wall funding controversy
|
||||
impeachment proceedings begin
|
||||
nuclear weapons treaty violation
|
||||
artificial meat alternative launch
|
||||
student loan debt forgiveness
|
||||
venture capital funding decline
|
||||
facial recognition ban proposed
|
||||
election interference investigation
|
||||
pandemic preparedness failures
|
||||
police reform measures announced
|
||||
wildfire prevention strategies
|
||||
ocean pollution crisis worsens
|
||||
manufacturing jobs returning
|
||||
pension fund shortfall concerns
|
||||
antitrust investigation launched
|
||||
voting rights protection act
|
||||
mental health awareness campaign
|
||||
homeless population increasing
|
||||
space debris collision risk
|
||||
drug cartel violence escalates
|
||||
renewable energy jobs growth
|
||||
infrastructure deterioration report
|
||||
vaccine mandate legal challenge
|
||||
cryptocurrency market volatility
|
||||
autonomous drone delivery service
|
||||
deep fake technology dangers
|
||||
Arctic ice melting accelerates
|
||||
income inequality gap widens
|
||||
election fraud claims disputed
|
||||
corporate merger blocked
|
||||
medical breakthrough extends life
|
||||
transportation strike disrupts city
|
||||
racial justice protests spread
|
||||
carbon emissions reduction goals
|
||||
financial crisis warning signs
|
||||
cyberbullying prevention efforts
|
||||
asteroid near miss with Earth
|
||||
gene therapy approval granted
|
||||
labor union organizing drive
|
||||
surveillance technology expansion
|
||||
education funding cuts proposed
|
||||
disaster relief efforts underway
|
||||
housing affordability crisis
|
||||
clean water access shortage
|
||||
artificial intelligence job displacement
|
||||
trade agreement negotiations
|
||||
prison reform initiative launched
|
||||
species extinction accelerates
|
||||
political corruption scandal
|
||||
terrorism threat level raised
|
||||
food safety contamination outbreak
|
||||
ai model release
|
||||
affordability interest rates
|
||||
peanut allergies in newbons
|
||||
breaking bad walter white
|
||||
Loading…
Add table
Add a link
Reference in a new issue