mirror of
https://github.com/samvallad33/vestige.git
synced 2026-05-09 15:52:37 +02:00
Switch embedding model from BGE to nomic-embed-text-v1.5
- Replace BGE-base-en-v1.5 with nomic-embed-text-v1.5 - 8192 token context window (vs 512 for BGE) - Matryoshka representation learning support - Fully open source with training data released - Same 768 dimensions, no schema changes required Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
449d60754a
commit
5337efdfa7
5 changed files with 20 additions and 19 deletions
|
|
@ -4,7 +4,7 @@
|
|||
//!
|
||||
//! - **FSRS-6**: 21-parameter spaced repetition (30% more efficient than SM-2)
|
||||
//! - **Dual-Strength Model**: Bjork & Bjork (1992) storage/retrieval strength
|
||||
//! - **Semantic Embeddings**: Local fastembed v5 (BGE-base-en-v1.5, 768 dimensions)
|
||||
//! - **Semantic Embeddings**: Local fastembed v5 (nomic-embed-text-v1.5, 768 dimensions)
|
||||
//! - **HNSW Vector Search**: USearch (20x faster than FAISS)
|
||||
//! - **Temporal Memory**: Bi-temporal model with validity periods
|
||||
//! - **Hybrid Search**: RRF fusion of keyword (BM25/FTS5) + semantic
|
||||
|
|
@ -394,9 +394,9 @@ pub const VERSION: &str = env!("CARGO_PKG_VERSION");
|
|||
/// FSRS algorithm version (6 = 21 parameters)
|
||||
pub const FSRS_VERSION: u8 = 6;
|
||||
|
||||
/// Default embedding model (2026 GOD TIER: BGE-base-en-v1.5)
|
||||
/// Upgraded from all-MiniLM-L6-v2 for +30% retrieval accuracy
|
||||
pub const DEFAULT_EMBEDDING_MODEL: &str = "BAAI/bge-base-en-v1.5";
|
||||
/// Default embedding model (2026 GOD TIER: nomic-embed-text-v1.5)
|
||||
/// 8192 token context, Matryoshka support, fully open source
|
||||
pub const DEFAULT_EMBEDDING_MODEL: &str = "nomic-ai/nomic-embed-text-v1.5";
|
||||
|
||||
// ============================================================================
|
||||
// PRELUDE
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue