Add AVX2-optimized Hamming distance using VPSHUFB popcount

Implements distance_hamming_avx2() which processes 32 bytes per
iteration using the standard VPSHUFB nibble-lookup popcount pattern.
Dispatched when SQLITE_VEC_ENABLE_AVX is defined and input >= 32
bytes. Falls back to u64 scalar or u8 byte-at-a-time for smaller
inputs.

Also adds -mavx2 flag to Makefile for x86-64 targets alongside
existing -mavx.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-31 17:39:41 -07:00
parent d033bf5728
commit d684178a12
2 changed files with 59 additions and 2 deletions

View file

@ -37,7 +37,7 @@ endif
ifndef OMIT_SIMD
ifeq ($(shell uname -sm),Darwin x86_64)
CFLAGS += -mavx -DSQLITE_VEC_ENABLE_AVX
CFLAGS += -mavx -mavx2 -DSQLITE_VEC_ENABLE_AVX
endif
ifeq ($(shell uname -sm),Darwin arm64)
CFLAGS += -mcpu=apple-m1 -DSQLITE_VEC_ENABLE_NEON
@ -45,7 +45,7 @@ ifndef OMIT_SIMD
ifeq ($(shell uname -s),Linux)
ifeq ($(findstring android,$(CC)),)
ifneq ($(filter avx,$(shell grep -o 'avx[^ ]*' /proc/cpuinfo 2>/dev/null | head -1)),)
CFLAGS += -mavx -DSQLITE_VEC_ENABLE_AVX
CFLAGS += -mavx -mavx2 -DSQLITE_VEC_ENABLE_AVX
endif
endif
endif