mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 00:36:56 +02:00
Filter deleted nodes from DiskANN search results and add delete tests
DiskANN's delete repair only fixes forward edges (nodes the deleted node pointed to). Stale reverse edges can cause deleted rowids to appear in search results. Fix: track a 'confirmed' flag on each search candidate, set when the full-precision vector is successfully read during re-ranking. Only confirmed candidates are included in output. Zero additional SQL queries — piggybacks on the existing re-rank vector read. Also adds delete hardening tests: - Rescore: interleaved delete+KNN, rowid_in after deletes, full delete+reinsert cycle - DiskANN: delete+reinsert cycles with KNN verification, interleaved delete+KNN Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2f4c2e4bdb
commit
b00865429b
4 changed files with 190 additions and 8 deletions
|
|
@ -608,6 +608,7 @@ static int diskann_candidate_list_insert(
|
|||
list->items[lo].rowid = rowid;
|
||||
list->items[lo].distance = distance;
|
||||
list->items[lo].visited = 0;
|
||||
list->items[lo].confirmed = 0;
|
||||
list->count++;
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -741,8 +742,9 @@ static int diskann_search(
|
|||
return rc;
|
||||
}
|
||||
|
||||
// Seed with medoid
|
||||
// Seed with medoid (confirmed — we already read its vector above)
|
||||
diskann_candidate_list_insert(&candidates, medoid, medoidDist);
|
||||
candidates.items[0].confirmed = 1;
|
||||
|
||||
// Pre-quantize query vector once for all quantized distance comparisons
|
||||
u8 *queryQuantized = NULL;
|
||||
|
|
@ -815,16 +817,27 @@ static int diskann_search(
|
|||
sqlite3_free(fullVec);
|
||||
// Update distance in candidate list and re-sort
|
||||
diskann_candidate_list_insert(&candidates, currentRowid, exactDist);
|
||||
// Mark as confirmed (vector exists, distance is exact)
|
||||
for (int ci = 0; ci < candidates.count; ci++) {
|
||||
if (candidates.items[ci].rowid == currentRowid) {
|
||||
candidates.items[ci].confirmed = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// If vector read failed, candidate stays unconfirmed (stale edge to deleted node)
|
||||
}
|
||||
|
||||
// 5. Output results (candidates are already sorted by distance)
|
||||
int resultCount = (candidates.count < k) ? candidates.count : k;
|
||||
*outCount = resultCount;
|
||||
for (int i = 0; i < resultCount; i++) {
|
||||
outRowids[i] = candidates.items[i].rowid;
|
||||
outDistances[i] = candidates.items[i].distance;
|
||||
// 5. Output results — only include confirmed candidates (whose vectors exist)
|
||||
int resultCount = 0;
|
||||
for (int i = 0; i < candidates.count && resultCount < k; i++) {
|
||||
if (candidates.items[i].confirmed) {
|
||||
outRowids[resultCount] = candidates.items[i].rowid;
|
||||
outDistances[resultCount] = candidates.items[i].distance;
|
||||
resultCount++;
|
||||
}
|
||||
}
|
||||
*outCount = resultCount;
|
||||
|
||||
sqlite3_free(queryQuantized);
|
||||
diskann_candidate_list_free(&candidates);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue