feat: Vestige v1.6.0 — 6x storage reduction, neural reranking, instant startup

Four internal optimizations for dramatically better performance:

1. F16 vector quantization (ScalarKind::F16 in USearch) — 2x storage savings
2. Matryoshka 256-dim truncation (768→256) — 3x embedding storage savings
3. Convex Combination fusion (0.3 keyword / 0.7 semantic) replacing RRF
4. Cross-encoder reranker (Jina Reranker v1 Turbo via fastembed TextRerank)

Combined: 6x vector storage reduction, ~20% better retrieval quality.
Cross-encoder loads in background — server starts instantly.
Old 768-dim embeddings auto-migrated on load.

614 tests pass, zero warnings.
This commit is contained in:
Sam Valladares 2026-02-19 01:09:39 -06:00
parent 5b7d22d427
commit 495a88331f
19 changed files with 195 additions and 98 deletions

View file

@ -162,8 +162,8 @@ pub async fn execute_hybrid(
.hybrid_search(
&args.query,
args.limit.unwrap_or(10).clamp(1, 50),
args.keyword_weight.unwrap_or(0.5).clamp(0.0, 1.0),
args.semantic_weight.unwrap_or(0.5).clamp(0.0, 1.0),
args.keyword_weight.unwrap_or(0.3).clamp(0.0, 1.0),
args.semantic_weight.unwrap_or(0.7).clamp(0.0, 1.0),
)
.map_err(|e| e.to_string())?;

View file

@ -127,9 +127,9 @@ pub async fn execute(
let min_retention = args.min_retention.unwrap_or(0.0).clamp(0.0, 1.0);
let min_similarity = args.min_similarity.unwrap_or(0.5).clamp(0.0, 1.0);
// Use balanced weights for hybrid search (keyword + semantic)
let keyword_weight = 0.5_f32;
let semantic_weight = 0.5_f32;
// Favor semantic search — research shows 0.3/0.7 outperforms equal weights
let keyword_weight = 0.3_f32;
let semantic_weight = 0.7_f32;
// ====================================================================
// STAGE 1: Hybrid search with 3x over-fetch for reranking pool
@ -160,7 +160,7 @@ pub async fn execute(
// ====================================================================
// STAGE 2: Reranker (BM25-like rescoring, trim to requested limit)
// ====================================================================
if let Ok(cog) = cognitive.try_lock() {
if let Ok(mut cog) = cognitive.try_lock() {
let candidates: Vec<_> = filtered_results
.iter()
.map(|r| (r.clone(), r.node.content.clone()))