mirror of
https://github.com/samvallad33/vestige.git
synced 2026-04-25 00:36:22 +02:00
Introduce 'tract' feature utilizing 'ort-tract'
This commit is contained in:
parent
2391acf480
commit
9973c163f8
9 changed files with 884 additions and 173 deletions
902
Cargo.lock
generated
902
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
12
README.md
12
README.md
|
|
@ -80,7 +80,17 @@ curl -L https://github.com/samvallad33/vestige/releases/latest/download/vestige-
|
|||
sudo mv vestige-mcp vestige vestige-restore /usr/local/bin/
|
||||
```
|
||||
|
||||
**macOS (Intel) and Windows:** Prebuilt binaries aren't currently shipped for these targets because of upstream toolchain gaps (`ort-sys` lacks Intel Mac prebuilts in the 2.0.0-rc.11 release that `fastembed 5.13.2` is pinned to; `usearch 2.24.0` hit a Windows MSVC compile break tracked as [usearch#746](https://github.com/unum-cloud/usearch/issues/746)). Both build fine from source in the meantime:
|
||||
**macOS (Intel) and Windows:** Prebuilt binaries aren't currently shipped for these targets because of upstream toolchain gaps (`usearch 2.24.0` hit a Windows MSVC compile break tracked as [usearch#746](https://github.com/unum-cloud/usearch/issues/746)). Both build fine from source.
|
||||
|
||||
**macOS Intel (`x86_64-apple-darwin`):** Use the `tract` feature, which swaps the ONNX Runtime backend for [tract](https://github.com/sonos/tract) — a pure-Rust inference engine with no prebuilt binary requirement:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/samvallad33/vestige && cd vestige
|
||||
cargo build --release -p vestige-mcp --no-default-features --features tract,vector-search
|
||||
# Binary lands at target/release/vestige-mcp
|
||||
```
|
||||
|
||||
**Windows:** Build with default features from source:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/samvallad33/vestige && cd vestige
|
||||
|
|
|
|||
|
|
@ -45,6 +45,19 @@ qwen3-reranker = ["embeddings", "fastembed/qwen3"]
|
|||
# Metal GPU acceleration on Apple Silicon (significantly faster inference)
|
||||
metal = ["fastembed/metal"]
|
||||
|
||||
# Pure-Rust ONNX backend via tract — no prebuilt ORT binary, no system ORT.
|
||||
# Compiles on every Rust target, including Intel Mac (x86_64-apple-darwin).
|
||||
# Mutually exclusive with `embeddings` (download) and `ort-dynamic` (system ORT).
|
||||
# Usage: --no-default-features --features tract,vector-search,bundled-sqlite
|
||||
tract = [
|
||||
"dep:fastembed",
|
||||
"dep:ort",
|
||||
"dep:ort-tract",
|
||||
"ort/alternative-backend",
|
||||
"fastembed/hf-hub-native-tls",
|
||||
"fastembed/image-models",
|
||||
]
|
||||
|
||||
|
||||
[dependencies]
|
||||
# Serialization
|
||||
|
|
@ -86,6 +99,8 @@ notify = "8"
|
|||
# nomic-embed-text-v1.5: 768 dimensions, 8192 token context, Matryoshka support
|
||||
# v5.11: Adds Nomic v2 MoE (nomic-v2-moe feature) + Qwen3 reranker (qwen3 feature)
|
||||
fastembed = { version = "5.11", default-features = false, features = ["hf-hub-native-tls", "image-models"], optional = true }
|
||||
ort = { version = "2.0.0-rc", default-features = false, features = ["alternative-backend"], optional = true }
|
||||
ort-tract = { version = "0.3", optional = true }
|
||||
|
||||
# ============================================================================
|
||||
# OPTIONAL: Vector Search (USearch - HNSW, 20x faster than FAISS)
|
||||
|
|
|
|||
|
|
@ -79,6 +79,8 @@ fn get_model() -> Result<std::sync::MutexGuard<'static, TextEmbedding>, Embeddin
|
|||
|
||||
// nomic-embed-text-v1.5: 768 dimensions, 8192 token context
|
||||
// Matryoshka representation learning, fully open source
|
||||
crate::embeddings::ensure_tract_backend();
|
||||
|
||||
let options = InitOptions::new(EmbeddingModel::NomicEmbedTextV15)
|
||||
.with_show_download_progress(true)
|
||||
.with_cache_dir(cache_dir);
|
||||
|
|
|
|||
|
|
@ -13,6 +13,21 @@ mod code;
|
|||
mod hybrid;
|
||||
mod local;
|
||||
|
||||
/// Register the tract backend exactly once before any fastembed session is created.
|
||||
/// Under `embeddings` or `ort-dynamic` this is a no-op; only active under `tract`.
|
||||
#[cfg(feature = "tract")]
|
||||
pub(crate) fn ensure_tract_backend() {
|
||||
use std::sync::Once;
|
||||
static ONCE: Once = Once::new();
|
||||
ONCE.call_once(|| {
|
||||
ort::set_api(ort_tract::api());
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "tract"))]
|
||||
#[inline(always)]
|
||||
pub(crate) fn ensure_tract_backend() {}
|
||||
|
||||
pub(crate) use local::get_cache_dir;
|
||||
pub use local::{
|
||||
BATCH_SIZE, EMBEDDING_DIMENSIONS, Embedding, EmbeddingError, EmbeddingService, MAX_TEXT_LENGTH,
|
||||
|
|
|
|||
|
|
@ -86,8 +86,8 @@ pub mod fts;
|
|||
pub mod memory;
|
||||
pub mod storage;
|
||||
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg_attr(docsrs, doc(cfg(feature = "embeddings")))]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
#[cfg_attr(docsrs, doc(cfg(any(feature = "embeddings", feature = "tract"))))]
|
||||
pub mod embeddings;
|
||||
|
||||
#[cfg(feature = "vector-search")]
|
||||
|
|
@ -395,7 +395,7 @@ pub use neuroscience::{
|
|||
};
|
||||
|
||||
// Embeddings (when feature enabled)
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
pub use embeddings::{
|
||||
EMBEDDING_DIMENSIONS, Embedding, EmbeddingError, EmbeddingService, cosine_similarity,
|
||||
euclidean_distance,
|
||||
|
|
@ -447,7 +447,7 @@ pub mod prelude {
|
|||
NodeType, Rating, RecallInput, Result, SearchMode, Storage, StorageError,
|
||||
};
|
||||
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
pub use crate::{Embedding, EmbeddingService};
|
||||
|
||||
#[cfg(feature = "vector-search")]
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@
|
|||
//! Falls back to BM25-like term overlap scoring when the cross-encoder
|
||||
//! model is unavailable.
|
||||
|
||||
#[cfg(feature = "embeddings")]
|
||||
use crate::embeddings::get_cache_dir;
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
use crate::embeddings::{ensure_tract_backend, get_cache_dir};
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
use fastembed::{RerankInitOptions, RerankerModel, TextRerank};
|
||||
|
||||
// ============================================================================
|
||||
|
|
@ -95,7 +95,7 @@ impl Default for RerankerConfig {
|
|||
/// Falls back to BM25-like term overlap when the model is unavailable.
|
||||
pub struct Reranker {
|
||||
config: RerankerConfig,
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
cross_encoder: Option<TextRerank>,
|
||||
}
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ impl Reranker {
|
|||
pub fn new(config: RerankerConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
cross_encoder: None,
|
||||
}
|
||||
}
|
||||
|
|
@ -122,12 +122,14 @@ impl Reranker {
|
|||
///
|
||||
/// Downloads the model on first call. Call this during server startup,
|
||||
/// NOT in tests or hot paths.
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
pub fn init_cross_encoder(&mut self) {
|
||||
if self.cross_encoder.is_some() {
|
||||
return; // Already initialized
|
||||
}
|
||||
|
||||
ensure_tract_backend();
|
||||
|
||||
let options = RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn)
|
||||
.with_cache_dir(get_cache_dir())
|
||||
.with_show_download_progress(true);
|
||||
|
|
@ -145,11 +147,11 @@ impl Reranker {
|
|||
|
||||
/// Check if the cross-encoder model is available
|
||||
pub fn has_cross_encoder(&self) -> bool {
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
{
|
||||
self.cross_encoder.is_some()
|
||||
}
|
||||
#[cfg(not(feature = "embeddings"))]
|
||||
#[cfg(not(any(feature = "embeddings", feature = "tract")))]
|
||||
{
|
||||
false
|
||||
}
|
||||
|
|
@ -178,7 +180,7 @@ impl Reranker {
|
|||
let limit = top_k.unwrap_or(self.config.result_count);
|
||||
|
||||
// Try cross-encoder first
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
if let Some(ref mut model) = self.cross_encoder {
|
||||
let documents: Vec<&str> = candidates.iter().map(|(_, text)| text.as_str()).collect();
|
||||
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@
|
|||
|
||||
use chrono::{DateTime, Duration, Utc};
|
||||
use directories::ProjectDirs;
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
use lru::LruCache;
|
||||
use rusqlite::{Connection, OptionalExtension, params};
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
use std::num::NonZeroUsize;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Mutex;
|
||||
|
|
@ -20,16 +20,16 @@ use crate::fts::sanitize_fts5_query;
|
|||
use crate::memory::{
|
||||
ConsolidationResult, IngestInput, KnowledgeNode, MemoryStats, RecallInput, SearchMode,
|
||||
};
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
use crate::memory::{EmbeddingResult, MatchType, SearchResult, SimilarityResult};
|
||||
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
use crate::embeddings::{EMBEDDING_DIMENSIONS, Embedding, EmbeddingService, matryoshka_truncate};
|
||||
|
||||
#[cfg(feature = "vector-search")]
|
||||
use crate::search::{VectorIndex, linear_combination};
|
||||
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
use crate::search::hyde;
|
||||
|
||||
// ============================================================================
|
||||
|
|
@ -91,12 +91,12 @@ pub struct Storage {
|
|||
writer: Mutex<Connection>,
|
||||
reader: Mutex<Connection>,
|
||||
scheduler: Mutex<FSRSScheduler>,
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
embedding_service: EmbeddingService,
|
||||
#[cfg(feature = "vector-search")]
|
||||
vector_index: Mutex<VectorIndex>,
|
||||
/// LRU cache for query embeddings to avoid re-embedding repeated queries
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
query_cache: Mutex<LruCache<String, Vec<f32>>>,
|
||||
}
|
||||
|
||||
|
|
@ -171,7 +171,7 @@ impl Storage {
|
|||
let reader_conn = Connection::open(&path)?;
|
||||
Self::configure_connection(&reader_conn)?;
|
||||
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
let embedding_service = EmbeddingService::new();
|
||||
|
||||
#[cfg(feature = "vector-search")]
|
||||
|
|
@ -180,7 +180,7 @@ impl Storage {
|
|||
|
||||
// Initialize LRU cache for query embeddings (capacity: 100 queries)
|
||||
// SAFETY: 100 is always non-zero, this cannot fail
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
let query_cache = Mutex::new(LruCache::new(
|
||||
NonZeroUsize::new(100).expect("100 is non-zero"),
|
||||
));
|
||||
|
|
@ -189,22 +189,22 @@ impl Storage {
|
|||
writer: Mutex::new(writer_conn),
|
||||
reader: Mutex::new(reader_conn),
|
||||
scheduler: Mutex::new(FSRSScheduler::default()),
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
embedding_service,
|
||||
#[cfg(feature = "vector-search")]
|
||||
vector_index: Mutex::new(vector_index),
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
query_cache,
|
||||
};
|
||||
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
storage.load_embeddings_into_index()?;
|
||||
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
/// Load existing embeddings into vector index
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
fn load_embeddings_into_index(&self) -> Result<()> {
|
||||
let reader = self
|
||||
.reader
|
||||
|
|
@ -324,7 +324,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
// Generate embedding if available
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
if let Err(e) = self.generate_embedding_for_node(&id, &input.content) {
|
||||
tracing::warn!("Failed to generate embedding for {}: {}", id, e);
|
||||
}
|
||||
|
|
@ -341,7 +341,7 @@ impl Storage {
|
|||
/// - Supersede a demoted/outdated memory (correction)
|
||||
///
|
||||
/// This solves the "bad vs good similar memory" problem.
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
pub fn smart_ingest(&self, input: IngestInput) -> Result<SmartIngestResult> {
|
||||
use crate::advanced::prediction_error::{
|
||||
CandidateMemory, GateDecision, PredictionErrorGate, UpdateType,
|
||||
|
|
@ -562,7 +562,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Get the embedding vector for a node
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
pub fn get_node_embedding(&self, node_id: &str) -> Result<Option<Vec<f32>>> {
|
||||
let reader = self
|
||||
.reader
|
||||
|
|
@ -580,7 +580,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Get all embedding vectors for duplicate detection
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
pub fn get_all_embeddings(&self) -> Result<Vec<(String, Vec<f32>)>> {
|
||||
let reader = self
|
||||
.reader
|
||||
|
|
@ -619,7 +619,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
// Regenerate embedding for updated content
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
{
|
||||
// Remove old embedding from index
|
||||
if let Ok(mut index) = self.vector_index.lock() {
|
||||
|
|
@ -635,7 +635,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Generate embedding for a node
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
fn generate_embedding_for_node(&self, node_id: &str, content: &str) -> Result<()> {
|
||||
if !self.embedding_service.is_ready() {
|
||||
return Ok(());
|
||||
|
|
@ -808,12 +808,12 @@ impl Storage {
|
|||
SearchMode::Keyword => {
|
||||
self.keyword_search(&input.query, input.limit, input.min_retention)?
|
||||
}
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
SearchMode::Semantic => {
|
||||
let results = self.semantic_search(&input.query, input.limit, 0.3)?;
|
||||
results.into_iter().map(|r| r.node).collect()
|
||||
}
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
SearchMode::Hybrid => {
|
||||
let results = self.hybrid_search(&input.query, input.limit, 0.3, 0.7)?;
|
||||
results.into_iter().map(|r| r.node).collect()
|
||||
|
|
@ -988,7 +988,7 @@ impl Storage {
|
|||
let _ = self.log_access(id, "search_hit");
|
||||
|
||||
// Content-aware cross-memory reinforcement: boost semantically similar neighbors
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
{
|
||||
if let Ok(Some(embedding)) = self.get_node_embedding(id) {
|
||||
let index = self
|
||||
|
|
@ -1485,7 +1485,7 @@ impl Storage {
|
|||
let rows = writer.execute("DELETE FROM knowledge_nodes WHERE id = ?1", params![id])?;
|
||||
|
||||
// Clean up vector index to prevent stale search results
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
if rows > 0
|
||||
&& let Ok(mut index) = self.vector_index.lock()
|
||||
{
|
||||
|
|
@ -1595,32 +1595,32 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Check if embedding service is ready
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
pub fn is_embedding_ready(&self) -> bool {
|
||||
self.embedding_service.is_ready()
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "embeddings"))]
|
||||
#[cfg(not(any(feature = "embeddings", feature = "tract")))]
|
||||
pub fn is_embedding_ready(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Initialize the embedding service explicitly
|
||||
/// Call this at startup to catch initialization errors early
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
pub fn init_embeddings(&self) -> Result<()> {
|
||||
self.embedding_service.init().map_err(|e| {
|
||||
StorageError::Init(format!("Embedding service initialization failed: {}", e))
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "embeddings"))]
|
||||
#[cfg(not(any(feature = "embeddings", feature = "tract")))]
|
||||
pub fn init_embeddings(&self) -> Result<()> {
|
||||
Ok(()) // No-op when embeddings feature is disabled
|
||||
}
|
||||
|
||||
/// Get query embedding from cache or compute it
|
||||
#[cfg(feature = "embeddings")]
|
||||
#[cfg(any(feature = "embeddings", feature = "tract"))]
|
||||
fn get_query_embedding(&self, query: &str) -> Result<Vec<f32>> {
|
||||
// Check cache first
|
||||
{
|
||||
|
|
@ -1652,7 +1652,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Semantic search
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
pub fn semantic_search(
|
||||
&self,
|
||||
query: &str,
|
||||
|
|
@ -1686,7 +1686,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Hybrid search (delegates to hybrid_search_filtered with no type filters)
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
pub fn hybrid_search(
|
||||
&self,
|
||||
query: &str,
|
||||
|
|
@ -1704,7 +1704,7 @@ impl Storage {
|
|||
/// `node_type` matches are excluded. `include_types` takes precedence over
|
||||
/// `exclude_types`. Both are case-sensitive and compared against the stored
|
||||
/// `node_type` value.
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
pub fn hybrid_search_filtered(
|
||||
&self,
|
||||
query: &str,
|
||||
|
|
@ -1833,7 +1833,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Keyword search returning scores, with optional type filtering in the SQL query.
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
fn keyword_search_with_scores(
|
||||
&self,
|
||||
query: &str,
|
||||
|
|
@ -1919,7 +1919,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Semantic search returning scores
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
fn semantic_search_raw(&self, query: &str, limit: i32) -> Result<Vec<(String, f32)>> {
|
||||
if !self.embedding_service.is_ready() {
|
||||
return Ok(vec![]);
|
||||
|
|
@ -1958,7 +1958,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Generate embeddings for nodes
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
pub fn generate_embeddings(
|
||||
&self,
|
||||
node_ids: Option<&[String]>,
|
||||
|
|
@ -2322,13 +2322,13 @@ impl Storage {
|
|||
}
|
||||
|
||||
// 3. Generate missing embeddings
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
let embeddings_generated = self.generate_missing_embeddings()?;
|
||||
#[cfg(not(all(feature = "embeddings", feature = "vector-search")))]
|
||||
let embeddings_generated = 0i64;
|
||||
|
||||
// 4. Auto-dedup: merge similar memories (episodic → semantic consolidation)
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
let duplicates_merged = self.auto_dedup_consolidation().unwrap_or(0);
|
||||
#[cfg(not(all(feature = "embeddings", feature = "vector-search")))]
|
||||
let duplicates_merged = 0i64;
|
||||
|
|
@ -2563,7 +2563,7 @@ impl Storage {
|
|||
///
|
||||
/// Finds clusters with cosine similarity > 0.85, keeps the strongest node,
|
||||
/// appends unique content from weaker nodes, and deletes duplicates.
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
fn auto_dedup_consolidation(&self) -> Result<i64> {
|
||||
let all_embeddings = self.get_all_embeddings()?;
|
||||
let n = all_embeddings.len();
|
||||
|
|
@ -2876,7 +2876,7 @@ impl Storage {
|
|||
}
|
||||
|
||||
/// Generate missing embeddings
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
fn generate_missing_embeddings(&self) -> Result<i64> {
|
||||
if !self.embedding_service.is_ready()
|
||||
&& let Err(e) = self.embedding_service.init()
|
||||
|
|
@ -4041,7 +4041,7 @@ impl Storage {
|
|||
let cutoff = (Utc::now() - Duration::days(min_age_days)).to_rfc3339();
|
||||
|
||||
// Collect IDs first for vector index cleanup
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
let doomed_ids: Vec<String> = {
|
||||
let reader = self
|
||||
.reader
|
||||
|
|
@ -4066,7 +4066,7 @@ impl Storage {
|
|||
drop(writer);
|
||||
|
||||
// Clean up vector index
|
||||
#[cfg(all(feature = "embeddings", feature = "vector-search"))]
|
||||
#[cfg(all(any(feature = "embeddings", feature = "tract"), feature = "vector-search"))]
|
||||
if deleted > 0
|
||||
&& let Ok(mut index) = self.vector_index.lock()
|
||||
{
|
||||
|
|
|
|||
|
|
@ -16,6 +16,9 @@ vector-search = ["vestige-core/vector-search"]
|
|||
# For systems with glibc < 2.38 — use runtime-loaded ORT instead of the downloaded pre-built binary.
|
||||
# Usage: cargo install --path crates/vestige-mcp --no-default-features --features ort-dynamic,vector-search
|
||||
ort-dynamic = ["vestige-core/ort-dynamic"]
|
||||
# Pure-Rust ONNX backend — no prebuilt ORT binary. Works on Intel Mac (x86_64-apple-darwin).
|
||||
# Usage: cargo build --release -p vestige-mcp --no-default-features --features tract,vector-search,bundled-sqlite
|
||||
tract = ["vestige-core/tract"]
|
||||
|
||||
[[bin]]
|
||||
name = "vestige-mcp"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue