mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Always Use Neon for L2 f32 (#20)
* use neon for vectors larger than 16 elements always * remove commented out code
This commit is contained in:
parent
66bc7e8947
commit
4cb924aaad
1 changed files with 11 additions and 3 deletions
14
sqlite-vec.c
14
sqlite-vec.c
|
|
@ -186,8 +186,16 @@ static f32 l2_sqr_float_neon(const void *pVect1v, const void *pVect2v,
|
||||||
sum3 = vfmaq_f32(sum3, diff, diff);
|
sum3 = vfmaq_f32(sum3, diff, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
return sqrt(
|
f32 sum_scalar = vaddvq_f32(vaddq_f32(vaddq_f32(sum0, sum1), vaddq_f32(sum2, sum3)));
|
||||||
vaddvq_f32(vaddq_f32(vaddq_f32(sum0, sum1), vaddq_f32(sum2, sum3))));
|
const f32 *pEnd2 = pVect1 + (qty - (qty16 << 4));
|
||||||
|
while (pVect1 < pEnd2) {
|
||||||
|
f32 diff = *pVect1 - *pVect2;
|
||||||
|
sum_scalar += diff * diff;
|
||||||
|
pVect1++;
|
||||||
|
pVect2++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sqrt(sum_scalar);
|
||||||
}
|
}
|
||||||
|
|
||||||
static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v,
|
static f32 l2_sqr_int8_neon(const void *pVect1v, const void *pVect2v,
|
||||||
|
|
@ -263,7 +271,7 @@ static f32 l2_sqr_int8(const void *pA, const void *pB, const void *pD) {
|
||||||
|
|
||||||
static f32 distance_l2_sqr_float(const void *a, const void *b, const void *d) {
|
static f32 distance_l2_sqr_float(const void *a, const void *b, const void *d) {
|
||||||
#ifdef SQLITE_VEC_ENABLE_NEON
|
#ifdef SQLITE_VEC_ENABLE_NEON
|
||||||
if (((*(const size_t *)d) % 16 == 0)) {
|
if ((*(const size_t *)d) > 16) {
|
||||||
return l2_sqr_float_neon(a, b, d);
|
return l2_sqr_float_neon(a, b, d);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue