mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 16:56:27 +02:00
Add ANN search support for vec0 virtual table
Add approximate nearest neighbor infrastructure to vec0: shared distance dispatch (vec0_distance_full), flat index type with parser, NEON-optimized cosine/Hamming for float32/int8, amalgamation script, and benchmark suite (benchmarks-ann/) with ground-truth generation and profiling tools. Remove unused vec_npy_each/vec_static_blobs code, fix missing stdint.h include.
This commit is contained in:
parent
dfd8dc5290
commit
bf2455f2ba
27 changed files with 2177 additions and 2116 deletions
119
scripts/amalgamate.py
Normal file
119
scripts/amalgamate.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Amalgamate sqlite-vec into a single distributable .c file.
|
||||
|
||||
Reads the dev sqlite-vec.c and inlines any #include "sqlite-vec-*.c" files,
|
||||
stripping LSP-support blocks and per-file include guards.
|
||||
|
||||
Usage:
|
||||
python3 scripts/amalgamate.py sqlite-vec.c > dist/sqlite-vec.c
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
def strip_lsp_block(content):
|
||||
"""Remove the LSP-support pattern:
|
||||
#ifndef SQLITE_VEC_H
|
||||
#include "sqlite-vec.c" // ...
|
||||
#endif
|
||||
"""
|
||||
pattern = re.compile(
|
||||
r'^\s*#ifndef\s+SQLITE_VEC_H\s*\n'
|
||||
r'\s*#include\s+"sqlite-vec\.c"[^\n]*\n'
|
||||
r'\s*#endif[^\n]*\n',
|
||||
re.MULTILINE,
|
||||
)
|
||||
return pattern.sub('', content)
|
||||
|
||||
|
||||
def strip_include_guard(content, guard_macro):
|
||||
"""Remove the include guard pair:
|
||||
#ifndef GUARD_MACRO
|
||||
#define GUARD_MACRO
|
||||
...content...
|
||||
(trailing #endif removed)
|
||||
"""
|
||||
# Strip the #ifndef / #define pair at the top
|
||||
header_pattern = re.compile(
|
||||
r'^\s*#ifndef\s+' + re.escape(guard_macro) + r'\s*\n'
|
||||
r'\s*#define\s+' + re.escape(guard_macro) + r'\s*\n',
|
||||
re.MULTILINE,
|
||||
)
|
||||
content = header_pattern.sub('', content, count=1)
|
||||
|
||||
# Strip the trailing #endif (last one in file that closes the guard)
|
||||
# Find the last #endif and remove it
|
||||
lines = content.rstrip('\n').split('\n')
|
||||
for i in range(len(lines) - 1, -1, -1):
|
||||
if re.match(r'^\s*#endif', lines[i]):
|
||||
lines.pop(i)
|
||||
break
|
||||
|
||||
return '\n'.join(lines) + '\n'
|
||||
|
||||
|
||||
def detect_include_guard(content):
|
||||
"""Detect an include guard macro like SQLITE_VEC_IVF_C."""
|
||||
m = re.match(
|
||||
r'\s*(?:/\*[\s\S]*?\*/\s*)?' # optional block comment
|
||||
r'#ifndef\s+(SQLITE_VEC_\w+_C)\s*\n'
|
||||
r'#define\s+\1',
|
||||
content,
|
||||
)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def inline_include(match, base_dir):
|
||||
"""Replace an #include "sqlite-vec-*.c" with the file's contents."""
|
||||
filename = match.group(1)
|
||||
filepath = os.path.join(base_dir, filename)
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
print(f"Warning: {filepath} not found, leaving #include in place", file=sys.stderr)
|
||||
return match.group(0)
|
||||
|
||||
with open(filepath, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Strip LSP-support block
|
||||
content = strip_lsp_block(content)
|
||||
|
||||
# Strip include guard if present
|
||||
guard = detect_include_guard(content)
|
||||
if guard:
|
||||
content = strip_include_guard(content, guard)
|
||||
|
||||
separator = '/' * 78
|
||||
header = f'\n{separator}\n// Begin inlined: {filename}\n{separator}\n\n'
|
||||
footer = f'\n{separator}\n// End inlined: {filename}\n{separator}\n'
|
||||
|
||||
return header + content.strip('\n') + footer
|
||||
|
||||
|
||||
def amalgamate(input_path):
|
||||
base_dir = os.path.dirname(os.path.abspath(input_path))
|
||||
|
||||
with open(input_path, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Replace #include "sqlite-vec-*.c" with inlined contents
|
||||
include_pattern = re.compile(r'^#include\s+"(sqlite-vec-[^"]+\.c)"\s*$', re.MULTILINE)
|
||||
content = include_pattern.sub(lambda m: inline_include(m, base_dir), content)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 2:
|
||||
print(f"Usage: {sys.argv[0]} <input-file>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
result = amalgamate(sys.argv[1])
|
||||
sys.stdout.write(result)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue