sqlite-vec/scripts/amalgamate.py
Alex Garcia bf2455f2ba Add ANN search support for vec0 virtual table
Add approximate nearest neighbor infrastructure to vec0: shared distance
dispatch (vec0_distance_full), flat index type with parser, NEON-optimized
cosine/Hamming for float32/int8, amalgamation script, and benchmark suite
(benchmarks-ann/) with ground-truth generation and profiling tools. Remove
unused vec_npy_each/vec_static_blobs code, fix missing stdint.h include.
2026-03-29 19:44:44 -07:00

119 lines
3.3 KiB
Python

#!/usr/bin/env python3
"""
Amalgamate sqlite-vec into a single distributable .c file.
Reads the dev sqlite-vec.c and inlines any #include "sqlite-vec-*.c" files,
stripping LSP-support blocks and per-file include guards.
Usage:
python3 scripts/amalgamate.py sqlite-vec.c > dist/sqlite-vec.c
"""
import re
import sys
import os
def strip_lsp_block(content):
"""Remove the LSP-support pattern:
#ifndef SQLITE_VEC_H
#include "sqlite-vec.c" // ...
#endif
"""
pattern = re.compile(
r'^\s*#ifndef\s+SQLITE_VEC_H\s*\n'
r'\s*#include\s+"sqlite-vec\.c"[^\n]*\n'
r'\s*#endif[^\n]*\n',
re.MULTILINE,
)
return pattern.sub('', content)
def strip_include_guard(content, guard_macro):
"""Remove the include guard pair:
#ifndef GUARD_MACRO
#define GUARD_MACRO
...content...
(trailing #endif removed)
"""
# Strip the #ifndef / #define pair at the top
header_pattern = re.compile(
r'^\s*#ifndef\s+' + re.escape(guard_macro) + r'\s*\n'
r'\s*#define\s+' + re.escape(guard_macro) + r'\s*\n',
re.MULTILINE,
)
content = header_pattern.sub('', content, count=1)
# Strip the trailing #endif (last one in file that closes the guard)
# Find the last #endif and remove it
lines = content.rstrip('\n').split('\n')
for i in range(len(lines) - 1, -1, -1):
if re.match(r'^\s*#endif', lines[i]):
lines.pop(i)
break
return '\n'.join(lines) + '\n'
def detect_include_guard(content):
"""Detect an include guard macro like SQLITE_VEC_IVF_C."""
m = re.match(
r'\s*(?:/\*[\s\S]*?\*/\s*)?' # optional block comment
r'#ifndef\s+(SQLITE_VEC_\w+_C)\s*\n'
r'#define\s+\1',
content,
)
return m.group(1) if m else None
def inline_include(match, base_dir):
"""Replace an #include "sqlite-vec-*.c" with the file's contents."""
filename = match.group(1)
filepath = os.path.join(base_dir, filename)
if not os.path.exists(filepath):
print(f"Warning: {filepath} not found, leaving #include in place", file=sys.stderr)
return match.group(0)
with open(filepath, 'r') as f:
content = f.read()
# Strip LSP-support block
content = strip_lsp_block(content)
# Strip include guard if present
guard = detect_include_guard(content)
if guard:
content = strip_include_guard(content, guard)
separator = '/' * 78
header = f'\n{separator}\n// Begin inlined: {filename}\n{separator}\n\n'
footer = f'\n{separator}\n// End inlined: {filename}\n{separator}\n'
return header + content.strip('\n') + footer
def amalgamate(input_path):
base_dir = os.path.dirname(os.path.abspath(input_path))
with open(input_path, 'r') as f:
content = f.read()
# Replace #include "sqlite-vec-*.c" with inlined contents
include_pattern = re.compile(r'^#include\s+"(sqlite-vec-[^"]+\.c)"\s*$', re.MULTILINE)
content = include_pattern.sub(lambda m: inline_include(m, base_dir), content)
return content
def main():
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <input-file>", file=sys.stderr)
sys.exit(1)
result = amalgamate(sys.argv[1])
sys.stdout.write(result)
if __name__ == '__main__':
main()